{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "data = pd.read_csv('sp500.csv')\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = data.iloc[:,1:6]\n",
    "data = data.values.astype(float)\n",
    "data = pd.DataFrame(data)\n",
    "data = data.apply(lambda x: (x - np.mean(x)) / (np.max(x) - np.min(x)))\n",
    "print(data[:3])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "可以将上述读取股票数据的代码用一个函数表示："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "def read_stock(filename,normalize = True):    \n",
    "    data = pd.read_csv(filename)\n",
    "    data = data.iloc[:,1:6]\n",
    "    data = data.values.astype(float)\n",
    "    data = pd.DataFrame(data)   \n",
    "    if normalize:\n",
    "        data = data.apply(lambda x: (x - np.mean(x)) / (np.max(x) - np.min(x)))\n",
    "        return data\n",
    "\n",
    "data = read_stock('sp500.csv')\n",
    "print(data[:3])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "下面的代码绘制收盘价的曲线："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "x = np.array(data.iloc[:,-2])\n",
    "print(x.shape)\n",
    "plt.plot(x)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 7.1.4  生成自回归数据\n",
    "\n",
    "下面的代码通过正弦函数和余弦函数的组合成一个函数，然后采样该函数曲线的y坐标值构成一个序列数据："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "def gen_seq_data_from_function(f,ts):\n",
    "    return f(ts)\n",
    "\n",
    "T  =5000\n",
    "x = gen_seq_data_from_function(lambda ts:np.sin(ts*0.1)+np.cos(ts*0.2),\\\n",
    "                               np.arange(0, T))\n",
    "plt.plot(x[:500])\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "根据公式：\n",
    "$$x_t = a_0+a_1 x_{t-1}+\\cdots+ a_{\\tau}x_{t-\\tau} + \\epsilon \\tag{7-4}$$\n",
    "生存自回归数据。\n",
    "\n",
    "对自回归模型的研究表明，只有当系数构成的方程$x^{\\tau}-a_0x^{\\tau-1}-a_1x^{\\tau-2}-\\cdots-a_{\\tau}$的根的绝对值不超过1时，这个自回归模型才是稳定的，否则，生成的数据是不稳定的。\n",
    "\n",
    "函数init_coefficients()生成稳定的自回归模型的系数："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.random.seed(5)\n",
    "def init_coefficients(n):\n",
    "    while True:\n",
    "        a = np.random.random(n) - 0.5\n",
    "        coefficients = np.append(1, -a)       \n",
    "        if np.max(np.abs(np.roots(coefficients))) < 1:\n",
    "            return a\n",
    "init_coefficients(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_data(n,data_n,noise_value = 1,k=3):   \n",
    "    a = init_coefficients(n+1)   \n",
    "    x = np.zeros(data_n + n*(k+1))\n",
    "    x_noise = np.zeros(data_n + n*(k+1))\n",
    "    x_noise[:n]= np.random.randn(n)   \n",
    "    print(len(a))\n",
    "    \n",
    "    n_all = data_n + n*k\n",
    "    for i in range(n_all):        \n",
    "        x[n+i] = np.dot(x_noise[i:n+i][::-1], a[1:]) +a[0] \n",
    "        x_noise[n+i] = x[n+i] + noise_value * np.random.randn() \n",
    "       \n",
    "    x_noise = x_noise[k*n:] #舍弃前面的k*n个实数\n",
    "    x = x[k*n:]\n",
    "    return x_noise,x\n",
    "\n",
    "x,x_noise = generate_data(5,100)\n",
    "plt.plot(x[:80])\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 7.1.6  时间窗采样"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.random.seed(0)\n",
    "def gen_data_set(x,T,percentage = 0.9):\n",
    "    L = T + 1\n",
    "    data_set = []\n",
    "    for i in range(len(x) - (T+1)):\n",
    "        data_set.append(x[i: i + T+1])\n",
    "    data_set = np.array(data_set)\n",
    "    row = round(percentage * data_set.shape[0])\n",
    "    train = data_set[:int(row), :]\n",
    "    np.random.shuffle(train)\n",
    "    x_train = train[:, :-1]\n",
    "    y_train = train[:, -1]\n",
    "    x_test = data_set[int(row):, :-1]\n",
    "    y_test = data_set[int(row):, -1]\n",
    "    return [x_train, y_train, x_test, y_test]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = gen_seq_data_from_function(lambda ts:np.sin(ts*0.1)+np.cos(ts*0.2),\\\n",
    "                               np.arange(0, 5000))\n",
    "x_train, y_train, x_test, y_test = gen_data_set(x, 50)\n",
    "\n",
    "y_train = y_train.reshape(-1,1)\n",
    "print(x_train.shape,y_train.shape)\n",
    "print(x_test.shape,y_test.shape)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 7.1.7   时间窗方法建模和训练\n",
    "\n",
    "从自回归序列数据按照固定时间窗采样得到训练样本，就可以用监督式学习模型对其进行建模与训练。下面的代码用一个2层全连接神经网络对上述从函数值采样的自回归数据进行建模与训练："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from NeuralNetwork import *\n",
    "from train import *\n",
    "import util\n",
    "\n",
    "hidden_dim = 50\n",
    "n = x_train.shape[1]\n",
    "print(\"n\",n)\n",
    "nn = NeuralNetwork()\n",
    "nn.add_layer(Dense(n, hidden_dim)) #('xavier',0.01)))\n",
    "nn.add_layer(Relu())\n",
    "nn.add_layer(Dense(hidden_dim, 1)) #('xavier',0.01)))\n",
    "\n",
    "learning_rate = 1e-2\n",
    "momentum = 0.8 #0.9\n",
    "optimizer = SGD(nn.parameters(),learning_rate,momentum)\n",
    "\n",
    "epochs=20\n",
    "batch_size = 200 # len(train_x) #200   \n",
    "reg = 1e-1\n",
    "print_n=100\n",
    "\n",
    "losses = train_nn(nn,x_train,y_train,optimizer,\n",
    "                  util.mse_loss_grad,epochs,batch_size,reg,print_n)\n",
    "#print(losses[::len(losses)//50])\n",
    "plt.plot(losses)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 7.1.8   长期预测和短期预测\n",
    "\n",
    "##### 长期预测\n",
    "\n",
    "从初始时刻的真实数据样本预测后续一系列时刻的数据。并将这些预测值与测试集对应的目标值进行可视化比较。以观察这个模型的预测性能。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = x_test[0].copy()\n",
    "x = x.reshape(1,-1)\n",
    "ys =[]\n",
    "for i in range(400):\n",
    "    y = nn.forward(x)   \n",
    "    ys.append(y[0][0])\n",
    "    x = np.delete(x,0,1)\n",
    "    x = np.append(x, y.reshape(1,-1), axis=1)    \n",
    "ys  = ys[:]\n",
    "plt.plot(ys[:400])\n",
    "plt.plot(y_test[:400])\n",
    "plt.xlabel(\"time\")\n",
    "plt.ylabel(\"value\")\n",
    "plt.legend(['y','y_real'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 短期预测\n",
    "\n",
    "下面的代码是用训练的神经网络进行短期预测，即每次都是用真实数据预测其下一个时刻的数据值："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ys =[]\n",
    "for i in range(400):\n",
    "    x = x_test[i].copy()\n",
    "    x = x.reshape(1,-1)\n",
    "    y = nn.forward(x)   \n",
    "    ys.append(y[0][0])   \n",
    "ys  = ys[:]\n",
    "plt.plot(ys[:400])\n",
    "plt.plot(y_test[:400])\n",
    "plt.xlabel(\"time\")\n",
    "plt.ylabel(\"value\")\n",
    "plt.legend(['y','y_real'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 7.1.9 股票价格预测 \n",
    "下面代码用时间窗为100生成训练数据集和测试数据集，即用前100天的价格预测后一天的价格"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = read_stock('sp500.csv')\n",
    "x = np.array(data.iloc[:,-2])\n",
    "print(x.shape)\n",
    "x = x.reshape(-1,1)\n",
    "print(x.shape)\n",
    "\n",
    "x_train, y_train, x_test, y_test = gen_data_set(x, 100)\n",
    "y_train = y_train.reshape(-1,1)\n",
    "print(x_train.shape,y_train.shape)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "用训练集训练一个神经网络模型："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from NeuralNetwork import *\n",
    "import util\n",
    "\n",
    "hidden_dim = 500\n",
    "n = x_train.shape[1]\n",
    "print(\"n\",n)\n",
    "nn = NeuralNetwork()\n",
    "nn.add_layer(Dense(n, hidden_dim))\n",
    "nn.add_layer(Relu())\n",
    "nn.add_layer(Dense(hidden_dim, 1)) \n",
    "\n",
    "learning_rate = 0.1\n",
    "momentum = 0.8 #0.9\n",
    "optimizer = SGD(nn.parameters(),learning_rate,momentum)\n",
    "\n",
    "epochs=60\n",
    "batch_size = 500 # len(train_x) #200   \n",
    "reg = 1e-6\n",
    "print_n=50\n",
    "\n",
    "losses = train_nn(nn,x_train,y_train,optimizer,util.mse_loss_grad,epochs,batch_size,reg,print_n)\n",
    "plt.plot(losses)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "用测试集的第一个样本作为开始，进行长期预测，即不断用预测值构建新的数据特征去预测下一天的股票价格："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = x_test[0].copy()\n",
    "x = x.reshape(1,-1)\n",
    "ys =[]\n",
    "\n",
    "num = 400\n",
    "for i in range(num):\n",
    "    y = nn.forward(x)    \n",
    "    ys.append(y[0][0])\n",
    "    x = np.delete(x,0,1)\n",
    "    x = np.append(x, y.reshape(1,-1), axis=1)    \n",
    "ys  = ys[:]\n",
    "plt.plot(ys[:num])\n",
    "plt.plot(y_test[:num])\n",
    "plt.xlabel(\"time\")\n",
    "plt.ylabel(\"value\")\n",
    "plt.legend(['y','y_real'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ys =[]\n",
    "num = 400\n",
    "for i in range(num):\n",
    "    x = x_test[i].copy()\n",
    "    x = x.reshape(1,-1)\n",
    "    y = nn.forward(x) \n",
    "    ys.append(y[0][0])\n",
    "    x = np.delete(x,0,1)\n",
    "    x = np.append(x, y.reshape(1,-1), axis=1)    \n",
    "ys  = ys[:]\n",
    "plt.plot(ys[:num])\n",
    "plt.plot(y_test[:num])\n",
    "plt.xlabel(\"time\")\n",
    "plt.ylabel(\"value\")\n",
    "plt.legend(['y','y_real'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7.4 单层循环神经网络的实现\n",
    "### 7.4.1 初始化模型参数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "np.random.seed(1)\n",
    "def rnn_params_init(input_dim, hidden_dim,output_dim,scale = 0.01):        \n",
    "    Wx = np.random.randn(input_dim, hidden_dim)*scale # input to hidden\n",
    "    Wh = np.random.randn(hidden_dim, hidden_dim)*scale # hidden to hidden\n",
    "    bh = np.zeros((1,hidden_dim)) # hidden bias\n",
    "\n",
    "    Wf = np.random.randn(hidden_dim, output_dim)*scale # hidden to output        \n",
    "    bf = np.zeros((1,output_dim)) # output bias\n",
    "\n",
    "    return [Wx,Wh,bh,Wf,bf]  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def rnn_hidden_state_init(batch_dim, hidden_dim):\n",
    "    return np.zeros((batch_dim,hidden_dim))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 7.4.2 正向计算"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def rnn_forward(params,Xs, H_):\n",
    "    Wx, Wh, bh, Wf, bf = params\n",
    "    H = H_ #np.copy(H_)   \n",
    "   \n",
    "    Fs = []        \n",
    "    Hs = {}      \n",
    "    Hs[-1] = np.copy(H)    \n",
    " \n",
    "    for t  in range(len(Xs)):\n",
    "        X = Xs[t]       \n",
    "        H = np.tanh(np.dot(X, Wx) + np.dot(H, Wh) + bh)\n",
    "        F = np.dot(H, Wf) + bf       \n",
    "\n",
    "        Fs.append(F)\n",
    "        Hs[t] = H\n",
    "    return Fs, Hs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def rnn_forward_step(params,X, preH):\n",
    "    Wx, Wh, bh, Wf, bf = params     \n",
    "    H = np.tanh(np.dot(X, Wx) + np.dot(preH, Wh) + bh)\n",
    "    F = np.dot(H, Wf) + bf \n",
    "    return F, H"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def rnn_forward_(params,Xs, H_):\n",
    "    Wx, Wh, bh, Wf, bf = params\n",
    "    H = H_  \n",
    "   \n",
    "    Fs = []        \n",
    "    Hs = {}      \n",
    "    Hs[-1] = np.copy(H)    \n",
    " \n",
    "    for t  in range(len(Xs)):\n",
    "        X = Xs[t]       \n",
    "        F,H = rnn_forward_step(params,X,H)       \n",
    "        Fs.append(F)\n",
    "        Hs[t] = H\n",
    "    return Fs, Hs"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 7.4.3 损失函数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import util\n",
    "def rnn_loss_grad(Fs,Ys,loss_fn = util.cross_entropy_grad_loss,flatten = True):   \n",
    "    loss = 0\n",
    "    dFs = {}\n",
    "   \n",
    "    for t in range(len(Fs)):\n",
    "        F = Fs[t]\n",
    "        Y = Ys[t]   \n",
    "        if flatten and Y.ndim>=2:          \n",
    "            Y = Y.flatten()\n",
    "        loss_t,dF_t = loss_fn(F,Y)\n",
    "        loss += loss_t        \n",
    "        dFs[t] = dF_t\n",
    "       \n",
    "    return loss,dFs"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 7.4.4 反向求导"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import math\n",
    "def grad_clipping(grads,alpha):\n",
    "    norm = math.sqrt(sum((grad ** 2).sum() for grad in grads))\n",
    "    if norm > alpha:\n",
    "        ratio = alpha / norm\n",
    "        for i in range(len(grads)):\n",
    "            grads[i]*=ratio \n",
    "            \n",
    "def rnn_backward(params,Xs,Hs,dZs,clip_value = 5.): # Ys,loss_function):    \n",
    "    Wx, Wh,bh, Wf,bf = params\n",
    "    dWx, dWh, dWf = np.zeros_like(Wx), np.zeros_like(Wh), np.zeros_like(Wf)\n",
    "    dbh, dbf = np.zeros_like(bh), np.zeros_like(bf)       \n",
    "\n",
    "    dh_next = np.zeros_like(Hs[0])\n",
    "    h = Hs\n",
    "    x = Xs\n",
    "    \n",
    "    T = len(Xs)  #序列长度（时刻长度）  \n",
    "    for t in reversed(range(T)): \n",
    "        dZ = dZs[t]        \n",
    "        \n",
    "        dWf += np.dot(h[t].T,dZ)\n",
    "    \n",
    "        dbf += np.sum(dZ, axis=0, keepdims=True)         \n",
    "        dh = np.dot(dZ, Wf.T) + dh_next \n",
    "        dZh = (1 - h[t] * h[t]) * dh \n",
    "        \n",
    "        dbh += np.sum(dZh, axis=0, keepdims=True) \n",
    "        dWx += np.dot(x[t].T,dZh)\n",
    "        dWh += np.dot(h[t-1].T,dZh)\n",
    "        dh_next = np.dot(dZh,Wh.T)\n",
    "   \n",
    "    grads =  [dWx, dWh, dbh,dWf, dbf]\n",
    "    if clip_value is not None:\n",
    "        grad_clipping(grads,clip_value)\n",
    "    return grads"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def rnn_backward_step(params,dZ,X,H,H_,dh_next): \n",
    "    Wx, Wh,bh, Wf,bf = params\n",
    "    dWf = np.dot(H.T,dZ)\n",
    "\n",
    "    dbf = np.sum(dZ, axis=0, keepdims=True)         \n",
    "    dh = np.dot(dZ, Wf.T) + dh_next \n",
    "    dZh = (1 - H * H) * dh \n",
    "\n",
    "    dbh = np.sum(dZh, axis=0, keepdims=True) \n",
    "    dWx = np.dot(X.T,dZh)\n",
    "    dWh = np.dot(H_.T,dZh)\n",
    "    dh_next = np.dot(dZh,Wh.T)\n",
    "    return dWx, dWh,dbh, dWf,dbf,dh_next"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def rnn_backward_(params,Xs,Hs,dZs,clip_value = 5.): \n",
    "    Wx, Wh,bh, Wf,bf = params\n",
    "    dWx, dWh, dWf = np.zeros_like(Wx), np.zeros_like(Wh), np.zeros_like(Wf)\n",
    "    dbh, dbf = np.zeros_like(bh), np.zeros_like(bf)\n",
    "    dh_next = np.zeros_like(Hs[0])\n",
    "    \n",
    "    T = len(Xs)  #序列长度（时刻长度）  \n",
    "    for t in reversed(range(T)):  \n",
    "        dZ = dZs[t] \n",
    "        H= Hs[t]\n",
    "        H_ = Hs[t-1]\n",
    "        X = Xs[t]\n",
    "        \n",
    "        dWx_,dWh_,dbh_,dWf_,dbf_,dh_next = rnn_backward_step(params,dZ,X,H,H_,dh_next)\n",
    "        for grad,grad_t in zip([dWx, dWh,dbh, dWf,dbf],[dWx_,dWh_,dbh_,dWf_,dbf_]):\n",
    "            grad+=grad_t      \n",
    "\n",
    "    grads =  [dWx, dWh, dbh,dWf, dbf]\n",
    "    if clip_value is not None:\n",
    "    \tgrad_clipping(grads,clip_value)\n",
    "    return grads   "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 7.4.5  梯度验证"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "np.random.seed(1)\n",
    "\n",
    "#生成4个时刻，每批有2个样本的一批样本Xs及目标\n",
    "#定义一个输入、隐含层、输出层的大小分别是4、10、4的RNN模型\n",
    "if True:\n",
    "    T  = 5\n",
    "    input_dim, hidden_dim,output_dim = 4,10,4\n",
    "    batch_size = 1\n",
    "    seq_len = 5\n",
    "    Xs = np.random.rand(seq_len,batch_size,input_dim)\n",
    "    #Ys = np.random.randint(input_dim,size = (seq_len,batch_size,output_dim))\n",
    "    Ys = np.random.randint(input_dim,size = (seq_len,batch_size))\n",
    "    \n",
    "print(Xs)\n",
    "print(Ys)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "下面代码计算上述样本的分析梯度："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# --------cheack gradient-------------   \n",
    "params = rnn_params_init(input_dim, hidden_dim,output_dim)\n",
    "H_0 = rnn_hidden_state_init(batch_size,hidden_dim)\n",
    "\n",
    "Fs,Hs = rnn_forward(params,Xs,H_0) \n",
    "loss_function = rnn_loss_grad\n",
    "print(Fs[0].shape,Ys[0].shape)\n",
    "loss,dFs = loss_function(Fs,Ys)  \n",
    "grads = rnn_backward(params,Xs,Hs,dFs)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "下面代码定义了计算RNN损失的辅助函数rnn_loss()，然后调用util中的通用数值梯度函数numerical_gradient()计算RNN模型参数的数值梯度，并和上面的分析梯度进行误差比较，同时也输出了第一个模型参数的梯度："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def rnn_loss():\n",
    "    H_0 = np.zeros((1,hidden_dim))\n",
    "    H = np.copy(H_0)\n",
    "    Fs,Hs = rnn_forward(params,Xs,H) \n",
    "    loss_function = rnn_loss_grad\n",
    "    loss,dFs = loss_function(Fs,Ys)     \n",
    "    return loss\n",
    "\n",
    "\n",
    "numerical_grads = util.numerical_gradient(rnn_loss,params,1e-6) #rnn_numerical_gradient(rnn_loss,params,1e-10)\n",
    "#diff_error = lambda x, y: np.max(np.abs(x - y)) \n",
    "diff_error = lambda x, y: np.max( np.abs(x - y)/(np.maximum(1e-8, np.abs(x) + np.abs(y))))\n",
    "\n",
    "print(\"loss\",loss)\n",
    "print(\"[dWx, dWh, dbh,dWf, dbf]\")\n",
    "for i in range(len(grads)):\n",
    "    print(diff_error(grads[i],numerical_grads[i]))\n",
    "\n",
    "print(\"grads\",grads[1][:2])\n",
    "print(\"numerical_grads\",numerical_grads[1][:2])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 7.4.6 梯度下降训练"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class SGD():\n",
    "    def __init__(self,model_params,learning_rate=0.01, momentum=0.9):\n",
    "        self.params,self.lr,self.momentum = model_params,learning_rate,momentum\n",
    "        self.vs = []\n",
    "        for p in self.params:\n",
    "            v = np.zeros_like(p)\n",
    "            self.vs.append(v)   \n",
    "                \n",
    "    def step(self,grads): \n",
    "        for i in range(len(self.params)):   \n",
    "            grad = grads[i]           \n",
    "            self.vs[i] = self.momentum*self.vs[i]+self.lr* grad             \n",
    "            self.params[i] -= self.vs[i]\n",
    "\n",
    "    def scale_learning_rate(self,scale):\n",
    "        self.lr *= scale"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "当然，也可以其他的参数优化器，如AdaGrad优化器："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class AdaGrad():\n",
    "    def __init__(self,model_params,learning_rate=0.01):\n",
    "        self.params,self.lr= model_params,learning_rate\n",
    "        self.vs = []\n",
    "        self.delta = 1e-7\n",
    "        for p in self.params:\n",
    "            v = np.zeros_like(p)\n",
    "            self.vs.append(v)       \n",
    "                 \n",
    "    def step(self,grads): \n",
    "        for i in range(len(self.params)):  \n",
    "            grad = grads[i]\n",
    "            self.vs[i] += grad**2\n",
    "            self.params[i] -= self.lr* grad /(self.delta + np.sqrt(self.vs[i]))\n",
    "            \n",
    "    def scale_learning_rate(self,scale):\n",
    "        self.lr *= scale"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def rnn_train_epoch(params,data_iter,optimizer,iterations,loss_function,print_n=100):\n",
    "    Wx, Wh,bh, Wf,bf = params    \n",
    "    losses = []      \n",
    "    iter = 0\n",
    "  \n",
    "    hidden_size = Wh.shape[0]\n",
    "   \n",
    "    for Xs,Ys,start in data_iter:      \n",
    "        \n",
    "        batch_size = Xs[0].shape[0]\n",
    "        if start:\n",
    "            H = rnn_hidden_state_init(batch_size,hidden_size) \n",
    "        \n",
    "        Zs,Hs = rnn_forward(params,Xs,H)       \n",
    "        loss,dzs = loss_function(Zs,Ys)   \n",
    "       \n",
    "        if False:\n",
    "            print(\"Z.shape\",Zs[0].shape)\n",
    "            print(\"Y.shape\",Ys[0].shape)\n",
    "            print(\"H\",H.shape)\n",
    "\n",
    "        dWx, dWh, dbh,dWf, dbf = rnn_backward(params,Xs,Hs,dzs) \n",
    "        \n",
    "        H = Hs[len(Hs)-2]    #最后时刻的隐状态向量  \n",
    "        \n",
    "        \n",
    "        grads = [dWx, dWh, dbh,dWf, dbf]\n",
    "        optimizer.step(grads)\n",
    "        losses.append(loss)\n",
    "        \n",
    "        if iter % print_n == 0: \n",
    "            print ('iter %d, loss: %f' % (iter, loss)) \n",
    "        iter+=1\n",
    "        \n",
    "        if iter>iterations:break\n",
    "    return losses,H"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 7.4.7  序列数据的采样\n",
    "\n",
    "设$data$是原始序列数据，采样的所有序列样本长度都是T，下面的迭代器函数采用顺序采样的产生序列样本，即依次产生的序列样本是首尾相接的："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "def seg_data_iter_consecutive_one(data,T,start_range=0,repeat = False): \n",
    "    n = len(data)\n",
    "    if start_range>0:\n",
    "        start = np.random.randint(0, start_range)\n",
    "    else:\n",
    "        start = 0    \n",
    "    end = n-T\n",
    "    while True:       \n",
    "        for p in range(start,end,T):\n",
    "            #选取一个训练样本             \n",
    "            X = data[p:p+T]\n",
    "            Y = data[p+1:p+T+1] #[:,-1]            \n",
    "            #inputs = np.expand_dims(inputs, axis=1)\n",
    "            #targets  = targets.reshape(-1,1)\n",
    "            if p==start:\n",
    "                yield X,Y,True\n",
    "            else: \n",
    "                yield X,Y,False\n",
    "        if not repeat:\n",
    "            return"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "测试一下这个函数："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]\n",
    "data_it = seg_data_iter_consecutive_one(data,3,5)\n",
    "\n",
    "for X,Y,_ in data_it:\n",
    "    print(X,Y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "随机采样不需要保证依次采样的2个序列样本首尾相接，其实现更加简单，"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import random\n",
    "def seg_data_iter_random_one(data,T,repeat = False):\n",
    "    while True:  \n",
    "        end = len(data)-T\n",
    "        indices = list(range(0, end))\n",
    "        random.shuffle(indices)\n",
    "        for i in range(end):\n",
    "            p = indices[i]\n",
    "            X = data[p:p+T]\n",
    "            Y = data[p+1:p+T+1] \n",
    "            yield X,Y \n",
    "        if not repeat:\n",
    "            return "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "调用上面的随机采样函数："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_it = seg_data_iter_random_one(data,3)\n",
    "i=0\n",
    "for X,Y in data_it:\n",
    "    print(X,Y)  \n",
    "    i+=1\n",
    "    if i==3: break"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "对于随机采样，只要保证每批序列样本的开始位置不同就可以了，"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import random\n",
    "def seg_data_iter_random(data,T,batch_size,repeat = False):\n",
    "    while True:  \n",
    "        end = len(data)-T\n",
    "        indices = list(range(0, end))\n",
    "        random.shuffle(indices)\n",
    "        for i in range(0,end,batch_size):\n",
    "            batch_indices = indices[i:(i+batch_size)]\n",
    "            X = [data[p:p+T] for p in batch_indices]\n",
    "            Y = [data[p+1:p+T+1] for p in batch_indices]\n",
    "            yield X,Y \n",
    "        if not repeat:\n",
    "            return "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "同样，测试一下这个函数："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_it = seg_data_iter_random(data,3,2)\n",
    "i=0\n",
    "for X,Y in data_it:\n",
    "    print(\"X:\",X)\n",
    "    print(\"Y:\",Y)\n",
    "    i+=1\n",
    "    if i==3: break"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "顺序采样需要保证每一批的对应样本之间是首尾相接的。\n",
    "\n",
    "一种简单的解决方式是将原始数据划分成batch_size个子部分，在每个子部分采用顺序采样的方法采样一个序列样本，就自然保证batch_size个序列样本是首尾相接的，并且每批的不同样本来自不同的部分。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "batch_size  = 2\n",
    "data= np.array(data)\n",
    "data = data.reshape(batch_size,-1)\n",
    "print(data)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "但每个序列样本除输入外，还应该包含作为目标的序列，而目标序列正好比输入序列往后错开一个位置，因此，可以用下列代码产生`2*batch_size`个子块："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = np.array(range(20))\n",
    "print(data)\n",
    "batch_size = 2\n",
    "block_len = (len(data)-1)//2\n",
    "print(block_len)\n",
    "data_x = data[0:block_len*batch_size]  \n",
    "data_x = data_x.reshape(batch_size,-1)\n",
    "print(data_x)\n",
    "\n",
    "data_y = data[1:1+block_len*batch_size]  \n",
    "data_y = data_y.reshape(batch_size,-1)\n",
    "print(data_y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "现在，可以从data_x和data_y的第1行各取出一个序列分别作为输入序列和目标序列：`x1=[0,1,2],y1 =[1,2,3] `，再从它们的第2行各取出序列样本`x2 = [10,11,12],y2 = [11,12,13]`作为第2个样本，构成了第1批序列样本。\n",
    "```\n",
    "x1 = [0,1,2],   y1 = [1,2,3],  \n",
    "x2 = [10,11,12],y2 = [11,12,13]]\n",
    "```\n",
    "下面的批顺序采样函数rnn_data_iter_consecutive():"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def rnn_data_iter_consecutive(data, batch_size, seq_len,start_range=10):\n",
    "    #每次在data[start:]里采样，使得每一个epoch的训练样本不同\n",
    "    start = np.random.randint(0, start_range)    \n",
    "    block_len = (len(data)-start-1) // batch_size #每块的长度block_len\n",
    "  \n",
    "    Xs = data[start:start+block_len*batch_size]   \n",
    "    Xs = Xs.reshape(batch_size,-1)\n",
    "    Ys = data[start+1:start+block_len*batch_size+1]      \n",
    "    Ys = Ys.reshape(batch_size,-1)\n",
    "    \n",
    "    #在每个块里采样长度为seq_len的样本序列\n",
    "    num_batches = Xs.shape[1] // seq_len           #多少批样本\n",
    "    end_pos = num_batches * seq_len\n",
    "    for i in range(0, end_pos, seq_len): #采样一批样本\n",
    "        X = Xs[:,i:(i+seq_len)]\n",
    "        Y = Ys[:,i:(i+seq_len)]\n",
    "        yield X, Y "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "测试一下上述函数："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = list(range(20))\n",
    "print(data[:20])\n",
    "data_it = rnn_data_iter_consecutive(np.array(data[:20]),2,3,1)\n",
    "\n",
    "for X,Y in data_it:\n",
    "    print(\"X:\",X)\n",
    "    print(\"Y:\",Y)   "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "上面采样的批序列样本的每个X是一个二维张量，第一轴是批大小，第二轴是序列长度。而前面的循环神经网络假设序列样本的第一轴是序列长度而不是批大小，可以交换序列长度和批大小对应的轴。\n",
    "\n",
    "上述的X假设每个数据元素是长度为1的标量，但实际问题中，每个数据是包含多个特征的向量甚至是多维张量（如图像），如果每个数据元素是多个特征的向量，则X就是一个三维张量。因此，可将上述的二维张量的序列样本X转化为三维张量："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x1 = np.swapaxes(X,0,1)\n",
    "x1 = x1.reshape(x1.shape[0],x1.shape[1],-1)\n",
    "print(x1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "可以改写一下上述的函数，增加一个to_3D参数决定是否要转化为3D张量："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "def rnn_data_iter_consecutive(data, batch_size, seq_len,start_range=10,to_3D = True):\n",
    "    #每次在data[offset:]里采样，使得每一个epoch的训练样本不同\n",
    "    start = np.random.randint(0, start_range)     \n",
    "    block_len = (len(data)-start-1) // batch_size\n",
    "  \n",
    "    Xs = data[start:start+block_len*batch_size]\n",
    "    Ys = data[start+1:start+block_len*batch_size+1]   \n",
    "    Xs = Xs.reshape(batch_size,-1)\n",
    "    Ys = Ys.reshape(batch_size,-1)\n",
    "    \n",
    "    #在每个块里可以i采样多少个长度为seq_len的样本序列\n",
    "    reset = True\n",
    "    num_batches = Xs.shape[1] // seq_len\n",
    "    for i in range(0, num_batches * seq_len, seq_len):\n",
    "        X = Xs[:,i:(i+seq_len)]\n",
    "        Y = Ys[:,i:(i+seq_len)]\n",
    "        if to_3D:\n",
    "            X = np.swapaxes(X,0,1)\n",
    "            X = X.reshape(X.shape[0],X.shape[1],-1)\n",
    "            #X = np.expand_dims(X, axis=2)\n",
    "            Y = np.swapaxes(Y,0,1)\n",
    "            Y = Y.reshape(Y.shape[0],Y.shape[1],-1)\n",
    "        else:\n",
    "            X = np.swapaxes(X,0,1)\n",
    "            Y = np.swapaxes(Y,0,1)            \n",
    "        if reset: \n",
    "            reset = False\n",
    "            yield X, Y,True \n",
    "        else: yield X, Y,False "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "其中，数据迭代器生成样本(Xs,Ys)并返回一个是否要重置RNN隐状态的标志。如果该标志是True，则重置RNN隐状态H。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = np.array(list(range(20))).reshape(-1,1)\n",
    "data_it = rnn_data_iter_consecutive(data,2,3,2)\n",
    "i = 0\n",
    "for X,Y,_ in data_it:\n",
    "    print(\"X:\",X)\n",
    "    print(\"Y:\",Y) \n",
    "    i+=1\n",
    "    if i==2 :break"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 7.4.8 序列数据的RNN训练和预测\n",
    "\n",
    "下面的用前面的采样自曲线的y值的实数训练RNN模型："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "T = 5000  # Generate a total of 1000 points\n",
    "time = np.arange(0, T)\n",
    "data = np.sin(time*0.1)+np.cos(time*0.2)\n",
    "print(data.shape)\n",
    "\n",
    "batch_size = 3\n",
    "input_dim = 1\n",
    "output_dim= 1\n",
    "hidden_size=100\n",
    "seq_length = 50\n",
    "params = rnn_params_init(input_dim, hidden_size,output_dim)\n",
    "H = rnn_hidden_state_init(batch_size,hidden_size)\n",
    "\n",
    "data_it = rnn_data_iter_consecutive(data,batch_size,seq_length,2)\n",
    "x,y,_ = next(data_it)\n",
    "print(\"X:\",x.shape,\"Y:\",y.shape,\"H:\",H.shape)\n",
    "\n",
    "loss_function = lambda F,Y:rnn_loss_grad(F,Y,util.mse_loss_grad,False)\n",
    "\n",
    "Zs,Hs = rnn_forward(params,x,H)\n",
    "print(\"Z:\",Zs[0].shape,\"H:\",Hs[0].shape)\n",
    "loss,dzs = loss_function(Zs,y)   \n",
    "print(dzs[0].shape)\n",
    "\n",
    "epoches = 10\n",
    "learning_rate = 5e-4\n",
    "\n",
    "iterations  =200\n",
    "losses = []\n",
    "\n",
    "#optimizer = AdaGrad(params,learning_rate)\n",
    "momentum = 0.9\n",
    "optimizer = SGD(params,learning_rate,momentum)\n",
    "\n",
    "for epoch in range(epoches):\n",
    "    data_it = rnn_data_iter_consecutive(data,batch_size,seq_length,100)\n",
    "   # epoch_losses,param,H = rnn_train(params,data_it,learning_rate,iterations,loss_function,print_n=100)\n",
    "    epoch_losses,H = rnn_train_epoch(params,data_it,optimizer,iterations,loss_function,print_n=50)\n",
    "     #losses.extend(epoch_losses)  \n",
    "    epoch_losses = np.array(epoch_losses).mean()   \n",
    "    losses.append(epoch_losses) "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "绘制训练损失曲线"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "plt.plot(losses)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 预测\n",
    "\n",
    "下面的代码用训练的RNN模型从某个时刻的数据预测后面500个时刻的输出："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "H = rnn_hidden_state_init(1,hidden_size)   \n",
    "\n",
    "start = 3\n",
    "x = data[start:start+1].copy()\n",
    "x =x.reshape(x.shape[0],1,-1)\n",
    "print(x.shape)\n",
    "x = x.reshape(1,-1)\n",
    "ys =[]\n",
    "print(x.flatten())\n",
    "for i in range(500):\n",
    "    F,H= rnn_forward_step(params,x,H) \n",
    "    x=F\n",
    "    ys.append(F[0,0])    \n",
    "    \n",
    "print(len(ys))   \n",
    "ys  = ys[:]\n",
    "plt.plot(ys[:500])\n",
    "plt.plot(data[start+1:start+1+500])\n",
    "plt.xlabel(\"time\")\n",
    "plt.ylabel(\"value\")\n",
    "plt.legend(['y','y_real'])\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "这个预测不是很准确，如果只从当前时刻预测其下一时刻的数据，即从`data[t]`预测`data[t+1]`。下面的代码采用这种短期预测方式从`data[start,start+500]`中的每个时刻的数据取预测下一时刻的数据，即预测`data[start+1,start+1+500]`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "H = rnn_hidden_state_init(1,hidden_size)   \n",
    "\n",
    "start = 3\n",
    "ys =[]\n",
    "for i in range(500):\n",
    "    x= data[start+i:start+i+1].copy()\n",
    "    x = x.reshape(1,-1)\n",
    "    F,H= rnn_forward_step(params,x,H)\n",
    "    ys.append(F[0,0])\n",
    " \n",
    "    \n",
    "ys  = ys[:]\n",
    "plt.plot(ys[:500])\n",
    "plt.plot(data[start+1:start+501])\n",
    "plt.xlabel(\"time\")\n",
    "plt.ylabel(\"value\")\n",
    "plt.legend(['y','y_real'])\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "短期预测下一时刻的结果和真实数据完全重合，说明短期预测很好。 上述RNN的相关代码在本书代码的`rnn.py`文件中。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 股票数据的训练和预测\n",
    "\n",
    "对于股票数据，可以只用股票的收盘价作为序列数据进行股票收盘价的预测，下面代码将股票的收盘价数据作为自回归数据："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = read_stock('sp500.csv')\n",
    "data = np.array(data.iloc[:,-2]).reshape(-1,1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "下面代码用股票的所有指标（开盘价、最高价、最低价、收盘价、交易量）对股票收盘价进行预测，首先同样训练RNN模型："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "data = read_stock('sp500.csv')\n",
    "\n",
    "stock_data = np.array(data)\n",
    "print(\"stock_data.shape\",stock_data.shape)\n",
    "print(\"stock_data[:3]\\n\",stock_data[:3])\n",
    "\n",
    "def stock_data_iter(data,seq_length):\n",
    "    feature_n = data.shape[1]\n",
    "    num = (len(data)-1)//seq_length    \n",
    "    while True:       \n",
    "        for i in range(num):\n",
    "            #选取一个训练样本 \n",
    "            p = i*seq_length\n",
    "            inputs = data[p:p+seq_length]\n",
    "            targets = data[p+1:p+seq_length+1][:,-2]  \n",
    "            inputs = np.expand_dims(inputs, axis=1)\n",
    "            targets  = targets.reshape(-1,1)\n",
    "            if i==0:\n",
    "                yield inputs,targets,True\n",
    "            else: \n",
    "                yield inputs,targets,False\n",
    "\n",
    "\n",
    "batch_size = 1 \n",
    "input_dim= stock_data.shape[1]\n",
    "hidden_dim = 100\n",
    "output_dim=1\n",
    "params = rnn_params_init(input_dim, hidden_dim,output_dim)\n",
    "H = rnn_hidden_state_init(batch_size,hidden_dim)\n",
    "\n",
    "seq_length = 100 # number of steps to unroll the RNN for\n",
    "\n",
    "data_it = stock_data_iter(stock_data, seq_length)\n",
    "X,Y,_ = next(data_it)\n",
    "print(X.shape,Y.shape)\n",
    "\n",
    "loss_function = lambda F,Y:rnn_loss_grad(F,Y,util.mse_loss_grad,False)\n",
    "\n",
    "# hyperparameters\n",
    "epoches = 2\n",
    "learning_rate = 1e-4\n",
    "iterations  =2000\n",
    "losses = []\n",
    "\n",
    "#optimizer = AdaGrad(params,learning_rate)\n",
    "momentum = 0.9\n",
    "optimizer = SGD(params,learning_rate,momentum)\n",
    "\n",
    "for epoch in range(epoches):\n",
    "    data_it =  stock_data_iter(stock_data, seq_length)\n",
    "   # epoch_losses,param,H = rnn_train(params,data_it,learning_rate,iterations,loss_function,print_n=100)\n",
    "    epoch_losses,H = rnn_train_epoch(params,data_it,optimizer,iterations,loss_function,print_n=200)\n",
    "    losses.extend(epoch_losses)  \n",
    "    #epoch_losses = np.array(epoch_losses).mean()   \n",
    "    #losses.append(epoch_losses)  \n",
    "plt.plot(losses)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "上述的序列数据不是自回归数据，每个时刻股票数据是多个特征构成的向量，而预测的下一时刻的股票价格是一个数值，即输入是多个值的向量而输出是一个值的数据，根据该模型无法进行长期预测。下面代码是根据训练的RNN进行的短期预测："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "H = rnn_hidden_state_init(1,hidden_dim)   \n",
    "\n",
    "start = 3\n",
    "data = stock_data[start:,:]\n",
    "\n",
    "ys =[]\n",
    "for i in range(len(data)):\n",
    "    x= data[i,:].copy()\n",
    "    x = x.reshape(1,-1)\n",
    "    f,H = rnn_forward_step(params,x,H)     \n",
    "    ys.append(f[0,0])\n",
    "\n",
    "ys  = ys[:]\n",
    "plt.plot(ys[:500])\n",
    "plt.plot(data[:500,-2])\n",
    "plt.xlabel(\"time\")\n",
    "plt.ylabel(\"value\")\n",
    "plt.legend(['y','y_real'])\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7.5 RNN语言模型和文本生成\n",
    "### 7.5.1 字符表"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "filename = 'input.txt'\n",
    "data = open(filename, 'r').read()\n",
    "chars = list(set(data))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "输出文本中所有字符的数目和字符表的长度，字符表前10个字符和文本的前148个字符："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_size, vocab_size = len(data), len(chars)\n",
    "print ('总字符个数 %d,字符表的长度 %d unique.' % (data_size, vocab_size))\n",
    "print('字符表的前10个字符：\\n',chars[:10])\n",
    "print('前148个字符：\\n',data[:148])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "字符表中的每个字符对应一个下标，可以用2个字典表示字符到下标、下标到字符的映射关系："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "char_to_idx = { ch:i for i,ch in enumerate(chars) }\n",
    "idx_to_char = { i:ch for i,ch in enumerate(chars) }"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "函数one_hot_idx根据字符（单词）表大小vocab_size和一个字符在字符（单词）表的下标idx将该字符转化为一个one-hot向量："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def one_hot_idx(idx,vocab_size):  \n",
    "    x = np.zeros((1,vocab_size)) \n",
    "    x[0,idx] = 1  \n",
    "    return x"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 7.5.2  字符序列样本的采样\n",
    "\n",
    "下面代码是采用顺序采样方式采样字符序列样本："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "def character_seq_data_iter_consecutive(data, batch_size, seq_len,start_range=10):\n",
    "      #每次在data[offset:]里采样，使得每一个epoch的训练样本不同\n",
    "    start = np.random.randint(0, start_range)     \n",
    "    block_len = (len(data)-start-1) // batch_size\n",
    "    num_batches = block_len // seq_len  #每块里最多能连续采样的批数\n",
    "    bs = np.array(range(0,block_len*batch_size,block_len) ) #每个block起始位置\n",
    "    \n",
    "    i_end = num_batches * seq_len\n",
    "    for i in range(0, i_end, seq_len): #一个block的序列开始位置\n",
    "        s = start+i              #在一个block里的位置\n",
    "        X = np.empty((seq_len,batch_size),dtype=object)#,dtype = np.int32)\n",
    "        Y = np.empty((seq_len,batch_size),dtype=object)#,dtype = np.int32) \n",
    "        for b in range(batch_size): #b表示一个批样本的第几个样本\n",
    "            s_b = s+bs[b]\n",
    "            for t in range(seq_len):\n",
    "                X[t,b] = data[s_b]\n",
    "                Y[t,b] = data[s_b+1]\n",
    "                s_b +=1 \n",
    "        if i==0:\n",
    "            yield X,Y,True\n",
    "        else:\n",
    "            yield X,Y,False"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "测试一下这个函数："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = 'Li,where are you from'\n",
    "batch_size = 2\n",
    "seq_length  = 3\n",
    "data_it = character_seq_data_iter_consecutive(x,batch_size,seq_length,1)\n",
    "\n",
    "i = 0 \n",
    "for x,y,_ in data_it:\n",
    "    print(\"x:\",x)\n",
    "    print(\"y\",y)\n",
    "    i+=1\n",
    "    if i==2:break"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "函数返回的字符需要进一步将它们向量化，如将每个字符转化为one-hot向量形式，为此，修改上述的函数："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def character_seq_data_iter_consecutive(data, batch_size, seq_len,vocab_size,start_range=10):\n",
    "      #每次在data[offset:]里采样，使得每一个epoch的训练样本不同\n",
    "    start = np.random.randint(0, start_range)     \n",
    "    block_len = (len(data)-start-1) // batch_size\n",
    "    num_batches = block_len // seq_len  #每块里最多能连续采样的批数\n",
    "    bs = np.array(range(0,block_len*batch_size,block_len) )\n",
    "\n",
    "    i_end = num_batches * seq_len\n",
    "    for i in range(0, i_end, seq_len):\n",
    "        s = start+i\n",
    "        X = np.empty((seq_len,batch_size,vocab_size),dtype = np.int32)\n",
    "        Y = np.empty((seq_len,batch_size,1),dtype = np.int32) \n",
    "        for b in range(batch_size):\n",
    "            s_b = s+bs[b]\n",
    "            for t in range(seq_len):\n",
    "                X[t,b,:] = one_hot_idx(char_to_idx[data[s_b]],vocab_size)\n",
    "                Y[t,b,:] = char_to_idx[data[s_b+1]]\n",
    "                s_b +=1 \n",
    "        if i==0:\n",
    "            yield X,Y,True\n",
    "        else:\n",
    "            yield X,Y,False"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "同样，测试一下这个函数："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = 'Li,where are you from'\n",
    "batch_size = 2\n",
    "seq_length  = 3\n",
    "data_it = character_seq_data_iter_consecutive(x,batch_size,seq_length,vocab_size,1)\n",
    "i = 0 \n",
    "for x,y,_ in data_it:\n",
    "    print(\"x:\",x)\n",
    "    print(\"y\",y)\n",
    "    i+=1\n",
    "    if i==2:break"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 7.5.3 RNN模型的训练和预测\n",
    "\n",
    "初始化一个RNN模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "batch_size = 1\n",
    "input_dim = vocab_size\n",
    "output_dim= vocab_size\n",
    "hidden_size=100\n",
    "params = rnn_params_init(input_dim, hidden_size,output_dim)\n",
    "H = rnn_hidden_state_init(batch_size,hidden_size)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 预测\n",
    "\n",
    "根据输出字符序列prefix生成其后面的一系列字符"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def predict_rnn(params,prefix,n): \n",
    "    Wx, Wh,bh, Wf,bf =  params\n",
    "    #Wxh, Whh,Why, bh, by =params[\"Wxh\"],params[\"Whh\"],params[\"Why\"],params[\"bh\"],params[\"by\"]\n",
    "    vocab_size,hidden_size = Wx.shape[0],Wh.shape[1]\n",
    "    h = rnn_hidden_state_init(1,hidden_size) \n",
    "    \n",
    "    output = [char_to_idx[prefix[0]]]\n",
    "   \n",
    "    for t in range(len(prefix) +n - 1):        \n",
    "        # 将上⼀时间步的输出作为当前时间步的输⼊。\n",
    "        x = one_hot_idx(output[-1], vocab_size)\n",
    "        \n",
    "        z,h = rnn_forward_step(params,x,h) \n",
    "        \n",
    "        #h = np.tanh(np.dot(x,Wx) + np.dot(h,Wh) + bh)\n",
    "        #z = np.dot(h,Wf) + bf   \n",
    "           \n",
    "        \n",
    "        if t < len(prefix) - 1:\n",
    "            output.append(char_to_idx[prefix[t + 1]])\n",
    "        else:\n",
    "            p = np.exp(z) / np.sum(np.exp(z)) \n",
    "           # idx = int(p.argmax(axis=1))\n",
    "            idx = np.random.choice(range(vocab_size), p=p.ravel())\n",
    "            output.append(idx)\n",
    "     \n",
    "        \n",
    "    return ''.join([idx_to_char[i] for i in output])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "测试这个预测函数："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "str = predict_rnn(params,\"he\",200)\n",
    "print(str)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "由于初始的RNN模型参数是随机的，预测也是随机的.\n",
    "\n",
    "可以用一个文本语料库采样的序列样本去训练RNN模型，如下面的代码："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "\n",
    "batch_size = 3\n",
    "input_dim = vocab_size\n",
    "output_dim= vocab_size\n",
    "hidden_size=100\n",
    "params = rnn_params_init(input_dim, hidden_size,output_dim)\n",
    "H = rnn_hidden_state_init(batch_size,hidden_size)\n",
    "seq_length = 25\n",
    "\n",
    "loss_function = lambda F,Y:rnn_loss_grad(F,Y) #,util.loss_grad_least)\n",
    "\n",
    "\n",
    "epoches = 3\n",
    "learning_rate = 1e-2\n",
    "iterations  =10000\n",
    "losses = []\n",
    "\n",
    "optimizer = AdaGrad(params,learning_rate)\n",
    "momentum = 0.9\n",
    "optimizer = SGD(params,learning_rate,momentum)\n",
    "\n",
    "for epoch in range(epoches):\n",
    "    data_it =  character_seq_data_iter_consecutive(data,batch_size,seq_length,vocab_size,100)\n",
    "   # epoch_losses,param,H = rnn_train(params,data_it,learning_rate,iterations,loss_function,print_n=100)\n",
    "    epoch_losses,H = rnn_train_epoch(params,data_it,optimizer,iterations,loss_function,print_n=10)\n",
    "    losses.extend(epoch_losses)  \n",
    "    #epoch_losses = np.array(epoch_losses).mean()   \n",
    "    #losses.append(epoch_losses)  \n",
    "plt.plot(losses[:])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "用训练后的RNN再次进行预测："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "str = predict_rnn(params,\"he\",200)\n",
    "print(str)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 7.7.3  LSTM的代码实现\n",
    "\n",
    "初始化这些模型参数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "def lstm_params_init(input_dim,hidden_dim,output_dim,scale=0.01):\n",
    "    normal = lambda m,n : np.random.randn(m, n)*scale\n",
    "    two = lambda : (normal(input_dim+hidden_dim, hidden_dim),np.zeros((1,hidden_dim)))\n",
    "    \n",
    "    Wi, bi = two()  # Input gate parameters\n",
    "    Wf, bf = two()  # Forget gate parameters\n",
    "    Wo, bo = two()  # Output gate parameters\n",
    "    Wc, bc = two()  # Candidate cell parameters\n",
    "    \n",
    "    Wy = normal(hidden_dim, output_dim)\n",
    "    by = np.zeros((1,output_dim))\n",
    "   \n",
    "    params = [Wi, bi,Wf, bf, Wo,bo, Wc,bc,Wy,by]\n",
    "    return params"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "初始化元胞状态$C_t$和隐状态$h_t$："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def lstm_state_init(batch_size, hidden_size):\n",
    "    return (np.zeros((batch_size, hidden_size)),\n",
    "            np.zeros((batch_size, hidden_size)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "正向计算（前向传播）："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def sigmoid(x):\n",
    "    return 1 / (1 + np.exp(-x))\n",
    "\n",
    "def lstm_forward(params,Xs, state):\n",
    "    [Wi, bi,Wf, bf, Wo,bo,Wc,bc,Wy,by] = params   \n",
    "    \n",
    "    (H, C) = state         #初始状态\n",
    "    Hs = {}\n",
    "    Cs = {}\n",
    "    Zs = []\n",
    "    \n",
    "    Hs[-1] = np.copy(H)\n",
    "    Cs[-1] = np.copy(C)\n",
    "    \n",
    "    Is = []\n",
    "    Fs = []\n",
    "    Os = []\n",
    "    C_tildas = []\n",
    "    \n",
    "    for t in range(len(Xs)): \n",
    "        X = Xs[t]\n",
    "        XH = np.column_stack((X, H))\n",
    "        if False:\n",
    "            print(\"XH.shape\",XH.shape)\n",
    "            print(\"Wi.shape\",Wi.shape)\n",
    "            break\n",
    "        I = sigmoid(np.dot(XH, Wi)+bi)\n",
    "        F = sigmoid(np.dot(XH, Wf)+bf)\n",
    "        O = sigmoid(np.dot(XH, Wo)+bo)\n",
    "        C_tilda = np.tanh(np.dot(XH, Wc)+bc)\n",
    "    \n",
    "        C = F * C + I * C_tilda\n",
    "        H = O*np.tanh(C)       #O * C.tanh()  #输出状态 \n",
    "        \n",
    "        Y = np.dot(H, Wy) + by        # 输出\n",
    "        \n",
    "        Zs.append(Y)\n",
    "        Hs[t] = H\n",
    "        Cs[t] = C\n",
    "        \n",
    "        Is.append(I)\n",
    "        Fs.append(F)\n",
    "        Os.append(O)\n",
    "        C_tildas.append(C_tilda)\n",
    "    return Zs,Hs,Cs,(Is,Fs,Os,C_tildas)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "同样，也可以将其中某个时刻的正向计算单独作为一个函数："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def lstm_forward_step(params,X,H,C):\n",
    "    [Wi, bi,Wf, bf, Wo,bo,Wc,bc,Wy,by] = params\n",
    "    \n",
    "    XH = np.column_stack((X, H))\n",
    "    I = sigmoid(np.dot(XH, Wi)+bi)\n",
    "    F = sigmoid(np.dot(XH, Wf)+bf)\n",
    "    O = sigmoid(np.dot(XH, Wo)+bo)\n",
    "    C_tilda = np.tanh(np.dot(XH, Wc)+bc)\n",
    "    \n",
    "    C = F * C + I * C_tilda\n",
    "    H = O*np.tanh(C)       #O * tanh(C)  #输出状态 \n",
    "    Y = np.dot(H, Wy) + by        # 输出\n",
    "    \n",
    "    return Y,H,C,(I,F,O,C_tilda)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "反向求导："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import math\n",
    "\n",
    "def dsigmoid(x):\n",
    "    return sigmoid(x) * (1 - sigmoid(x))\n",
    "\n",
    "def dtanh(x):\n",
    "    return 1 - np.tanh(x) * np.tanh(x)\n",
    "\n",
    "def grad_clipping(grads,alpha):\n",
    "    norm = math.sqrt(sum((grad ** 2).sum() for grad in grads))\n",
    "    if norm > alpha:\n",
    "        ratio = alpha / norm\n",
    "        for i in range(len(grads)):\n",
    "            grads[i]*=ratio \n",
    "            \n",
    "def lstm_backward(params,Xs,Hs,Cs,dZs,cache,clip_value = 5.): # Ys,loss_function):\n",
    "    [Wi, bi,Wf, bf, Wo,bo,Wc, bc,Wy,by] = params\n",
    "    \n",
    "    Is,Fs,Os,C_tildas = cache    \n",
    "    \n",
    "    dWi,dWf,dWo,dWc,dWy  = np.zeros_like(Wi), np.zeros_like(Wf), np.zeros_like(Wo), np.zeros_like(Wc), np.zeros_like(Wy)\n",
    "    dbi,dbf,dbo,dbc,dby = np.zeros_like(bi), np.zeros_like(bf),  np.zeros_like(bo), np.zeros_like(bc), np.zeros_like(by)\n",
    "\n",
    "    dH_next = np.zeros_like(Hs[0])\n",
    "    dC_next = np.zeros_like(Cs[0])\n",
    "    \n",
    "    input_dim = Xs[0].shape[1]\n",
    "  \n",
    "    h = Hs\n",
    "    x = Xs\n",
    "    \n",
    "    T = len(Xs)  \n",
    "    for t in reversed(range(T)):     \n",
    "        I = Is[t]\n",
    "        F = Fs[t]\n",
    "        O = Os[t]\n",
    "        C_tilda = C_tildas[t]\n",
    "        H = Hs[t]\n",
    "        X = Xs[t]\n",
    "        C = Cs[t]\n",
    "        H_pre =  Hs[t-1]\n",
    "        C_prev = Cs[t-1]\n",
    "        XH_pre = np.column_stack((X, H_pre))\n",
    "        XH_ = XH_pre\n",
    "        \n",
    "        dZ = dZs[t]  \n",
    "    \n",
    "        #输出f的模型参数的idu\n",
    "        dWy += np.dot(H.T,dZ)      \n",
    "        dby += np.sum(dZ, axis=0, keepdims=True)   \n",
    "        \n",
    "        #隐状态h的梯度\n",
    "        dH = np.dot(dZ, Wy.T) + dH_next      \n",
    "     \n",
    "        dC = dH*O*dtanh(C) +dC_next  # H_t= O_t*tanh(C_t)\n",
    "        \n",
    "        dO = np.tanh(C) *dH     \n",
    "        dOZ = O * (1-O)*dO          #O = sigma(Z_o)            \n",
    "        dWo += np.dot(XH_.T,dOZ)    # Z_o = (X,H_)W_o+b_o\n",
    "        dbo += np.sum(dOZ, axis=0, keepdims=True)              \n",
    "        \n",
    "         #di                         \n",
    "        di =  C_tilda*dC\n",
    "        diZ = I*(1-I) * di\n",
    "        dWi += np.dot(XH_.T,diZ)\n",
    "        dbi += np.sum(diZ, axis=0, keepdims=True)  \n",
    "        \n",
    "        #df\n",
    "        df = C_prev*dC\n",
    "        dfZ = F*(1-F) * df\n",
    "        dWf += np.dot(XH_.T,dfZ)\n",
    "        dbf += np.sum(dfZ, axis=0, keepdims=True)          \n",
    "        \n",
    "        # dC_bar       \n",
    "        dC_tilda = I*dC                         #C = F * C + I * C_tilda        \n",
    "        dC_tilda_Z =(1-np.square(C_tilda))*dC_tilda    # C_tilda = tanh(C_tilda_Z)         \n",
    "        dWc += np.dot(XH_.T,dC_tilda_Z)     # C_tilda_Z = (X,H_)W_c+b_c    \n",
    "        dbc += np.sum(dC_tilda_Z, axis=0, keepdims=True)\n",
    "       \n",
    "        dXH_ = (np.dot(dfZ, Wf.T)\n",
    "             + np.dot(diZ, Wi.T)\n",
    "             + np.dot(dC_tilda_Z, Wc.T)\n",
    "             + np.dot(dOZ, Wo.T))\n",
    "    \n",
    "        dX_prev = dXH_[:, :input_dim]\n",
    "        dH_prev = dXH_[:, input_dim:]\n",
    "        dC_prev = F * dC\n",
    "        \n",
    "        dC_next = dC_prev\n",
    "        dH_next = dH_prev      \n",
    "\n",
    "    grads = [dWi, dbi,dWf, dbf, dWo,dbo,dWc, dbc,dWy,dby]\n",
    "    grad_clipping(grads,clip_value)\n",
    "    #for dparam in [dWi, dbi,dWf, dbf, dWo,dbo,dWc, dbc,dWy,dby]:\n",
    "    #    np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients\n",
    "    return grads"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "梯度检验"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "T  = 3\n",
    "input_dim, hidden_dim,output_dim = 4,3,4\n",
    "batch_size = 2        \n",
    "Xs = np.random.randn(T,batch_size,input_dim)\n",
    "Ys = np.random.randint(output_dim, size=(T,batch_size))\n",
    " \n",
    "print(\"Xs\",Xs)\n",
    "print(\"Ys\",Ys)        \n",
    "        \n",
    "# cheack gradient  \n",
    "params = lstm_params_init(input_dim, hidden_dim,output_dim)\n",
    "HC = lstm_state_init(batch_size,hidden_dim)\n",
    "\n",
    "Zs,Hs,Cs,cache = lstm_forward(params,Xs,HC) \n",
    "loss_function = rnn_loss_grad\n",
    "loss,dZs = loss_function(Zs,Ys)  \n",
    "\n",
    "grads = lstm_backward(params,Xs,Hs,Cs,dZs,cache)\n",
    "\n",
    "def rnn_loss():\n",
    "    HC = lstm_state_init(batch_size,hidden_dim)    \n",
    "    Zs,Hs,Cs,cache= lstm_forward(params,Xs,HC) \n",
    "    loss_function = rnn_loss_grad\n",
    "    loss,dZs = loss_function(Zs,Ys)     \n",
    "    return loss\n",
    "\n",
    "numerical_grads = util.numerical_gradient(rnn_loss,params,1e-6) #rnn_numerical_gradient(rnn_loss,params,1e-10)\n",
    "#diff_error = lambda x, y: np.max( np.abs(x - y)/(np.maximum(1e-8, np.abs(x) + np.abs(y))))\n",
    "diff_error = lambda x, y: np.max( np.abs(x - y))\n",
    "                                 \n",
    "def rel_error(x, y):\n",
    "  \"\"\" returns relative error \"\"\"\n",
    "  return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))\n",
    "\n",
    "print(\"loss\",loss)\n",
    "print(\"[Wi, bi,Wf, bf, Wo,bo,Wc, bc,Wy,by] \")\n",
    "for i in range(len(grads)):\n",
    "    print(diff_error(grads[i],numerical_grads[i]))\n",
    "\n",
    "print(\"grads\",grads[0])\n",
    "print(\"numerical_grads\",numerical_grads[0])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "同样，可以定义梯度下降法的一次迭代过程："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def lstm_train_epoch(params,data_iter,optimizer,iterations,loss_function,print_n=100):\n",
    "    Wi, bi,Wf, bf, Wo,bo,Wc, bc,Wy,by = params\n",
    "    #Wxh, Whh,Why, bh, by =params[\"Wxh\"],params[\"Whh\"],params[\"Why\"],params[\"bh\"],params[\"by\"]\n",
    "    losses = []      \n",
    "    iter = 0\n",
    "    \n",
    "    batch_size = None\n",
    "    hidden_size = Wy.shape[0]\n",
    "   \n",
    "    for Xs,Ys,start in data_iter:\n",
    "        if not batch_size:\n",
    "            batch_size = Xs[0].shape[0]            \n",
    "        if start:\n",
    "            HC = lstm_state_init(batch_size,hidden_size) \n",
    "        \n",
    "        Zs,Hs,Cs,cache = lstm_forward(params,Xs,HC) \n",
    "        loss,dZs = loss_function(Zs,Ys)  \n",
    "        grads = lstm_backward(params,Xs,Hs,Cs,dZs,cache)\n",
    "      \n",
    "        optimizer.step(grads)\n",
    "        losses.append(loss)\n",
    "        \n",
    "        if iter % print_n == 0: \n",
    "            print ('iter %d, loss: %f' % (iter, loss)) \n",
    "        iter+=1\n",
    "        \n",
    "        if iter>iterations:break\n",
    "    return losses,H"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "文本生成"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "filename = 'input.txt'\n",
    "data = open(filename, 'r').read()\n",
    "chars = list(set(data))\n",
    "data_size, vocab_size = len(data), len(chars)\n",
    "print ('总字符个数 %d,字符表的长度 %d unique.' % (data_size, vocab_size))\n",
    "\n",
    "char_to_idx = { ch:i for i,ch in enumerate(chars) }\n",
    "idx_to_char = { i:ch for i,ch in enumerate(chars) }\n",
    "\n",
    "input_dim, hidden_dim,output_dim = vocab_size,100,vocab_size\n",
    "batch_size = 2   \n",
    "\n",
    "params = lstm_params_init(input_dim, hidden_dim,output_dim)\n",
    "H = lstm_state_init(batch_size,hidden_dim)\n",
    "seq_length = 25\n",
    "\n",
    "loss_function = lambda F,Y:rnn_loss_grad(F,Y) #,util.loss_grad_least)\n",
    "\n",
    "epoches = 3\n",
    "learning_rate = 1e-2\n",
    "iterations  =10000\n",
    "losses = []\n",
    "\n",
    "optimizer = AdaGrad(params,learning_rate)\n",
    "momentum = 0.9\n",
    "optimizer = SGD(params,learning_rate,momentum)\n",
    "\n",
    "for epoch in range(epoches):\n",
    "    data_it =  character_seq_data_iter_consecutive(data,batch_size,seq_length,vocab_size,100)\n",
    "   # epoch_losses,param,H = rnn_train(params,data_it,learning_rate,iterations,loss_function,print_n=100)\n",
    "    epoch_losses,H = lstm_train_epoch(params,data_it,optimizer,iterations,loss_function,print_n=10)\n",
    "    losses.extend(epoch_losses)  \n",
    "    #epoch_losses = np.array(epoch_losses).mean()   \n",
    "    #losses.append(epoch_losses)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 预测\n",
    "\n",
    "和rnn类似，可以定义如下的预测函数："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def predict_lstm(params,prefix,n): \n",
    "    Wi, bi,Wf, bf, Wo,bo,Wc, bc,Wy,by = params\n",
    "    vocab_size,hidden_dim = Wi.shape[0]-Wy.shape[0],Wy.shape[0]\n",
    "    h,c = lstm_state_init(1,hidden_dim) \n",
    "    \n",
    "    output = [char_to_idx[prefix[0]]]\n",
    "   \n",
    "    for t in range(len(prefix) +n - 1):        \n",
    "        # 将上⼀时间步的输出作为当前时间步的输⼊。\n",
    "        x = one_hot_idx(output[-1], vocab_size)\n",
    "        \n",
    "        z,h,c,_ = lstm_forward_step(params,x,h,c)\n",
    "              \n",
    "        if t < len(prefix) - 1:\n",
    "            output.append(char_to_idx[prefix[t + 1]])\n",
    "        else:\n",
    "            p = np.exp(z) / np.sum(np.exp(z)) \n",
    "           # idx = int(p.argmax(axis=1))\n",
    "            idx = np.random.choice(range(vocab_size), p=p.ravel())\n",
    "            output.append(idx)     \n",
    "        \n",
    "    return ''.join([idx_to_char[i] for i in output]) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "str = predict_lstm(params,\"he\",200)\n",
    "print(str)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "可以用一个类LSTMCell来表示包裹上述的LSTM相关的函数："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "def grad_clipping(grads,alpha):\n",
    "    norm = math.sqrt(sum((grad ** 2).sum() for grad in grads))\n",
    "    if norm > alpha:\n",
    "        ratio = alpha / norm\n",
    "        for i in range(len(grads)):\n",
    "            grads[i]*=ratio \n",
    "            \n",
    "class LSTM_cell(object):\n",
    "    def __init__(self,input_dim,hidden_dim,output_dim,scale=0.01):\n",
    "        #super(LSTM_cell, self).__init__()\n",
    "        self.input_dim,self.hidden_dim,self.output_dim = input_dim,hidden_dim,output_dim\n",
    "        normal = lambda m,n : np.random.randn(m, n)*scale\n",
    "        two = lambda : (normal(input_dim+hidden_dim, hidden_dim),np.zeros((1,hidden_dim)))\n",
    "\n",
    "        Wi, bi = two()  # Input gate parameters\n",
    "        Wf, bf = two()  # Forget gate parameters\n",
    "        Wo, bo = two()  # Output gate parameters\n",
    "        Wc, bc = two()  # Candidate cell parameters\n",
    "\n",
    "        Wy = normal(hidden_dim, output_dim)\n",
    "        by = np.zeros((1,output_dim))\n",
    "\n",
    "        #params = [Wi, bi,Wf, bf, Wo,bo, Wc,bc,Wy,by]\n",
    "        #  return params\n",
    "        self.params = [Wi, bi,Wf, bf, Wo,bo, Wc,bc,Wy,by]\n",
    "        self.grads = [np.zeros_like(param) for param in self.params]\n",
    "        self.H,self,C = None,None\n",
    "      \n",
    "    def reset_state(selfmbatch_size):\n",
    "        self.H,self.C = (np.zeros((batch_size, self.hidden_dim)),\n",
    "            np.zeros((batch_size, self.hidden_dim)))\n",
    "\n",
    "    def forward(self,Xs):\n",
    "        [Wi, bi,Wf, bf, Wo,bo,Wc,bc,Wy,by] = self.params   \n",
    "\n",
    "        if not self.H or self.C:\n",
    "            reset_state(Xs[0].shape[0])        \n",
    "        H, C =  self.H,self.C\n",
    "        Hs = {}\n",
    "        Cs = {}\n",
    "        Zs = []\n",
    "\n",
    "        Hs[-1] = np.copy(H)\n",
    "        Cs[-1] = np.copy(C)\n",
    "\n",
    "        Is = []\n",
    "        Fs = []\n",
    "        Os = []\n",
    "        C_tildas = []\n",
    "\n",
    "        for t in range(len(Xs)): \n",
    "            X = Xs[t]\n",
    "            XH = np.column_stack((X, H))\n",
    "            #print(\"XH.shape\",XH.shape)\n",
    "            #print(\"Wi.shape\",Wi.shape)\n",
    "            #break\n",
    "            I = sigmoid(np.dot(XH, Wi)+bi)\n",
    "            F = sigmoid(np.dot(XH, Wf)+bf)\n",
    "            O = sigmoid(np.dot(XH, Wo)+bo)\n",
    "            C_tilda = np.tanh(np.dot(XH, Wc)+bc)\n",
    "\n",
    "            C = F * C + I * C_tilda\n",
    "            H = O*np.tanh(C)       #O * C.tanh()  #输出状态 \n",
    "\n",
    "            Y = np.dot(H, Wy) + by        # 输出\n",
    "\n",
    "            Zs.append(Y)\n",
    "            Hs[t] = H\n",
    "            Cs[t] = C\n",
    "\n",
    "            Is.append(I)\n",
    "            Fs.append(F)\n",
    "            Os.append(O)\n",
    "            C_tildas.append(C_tilda)\n",
    "        self.Zs,self.Hs,self.Cs,self.Is,self.Fs,self.Os,self.C_tildas =  Zs,Hs,Cs,Is,Fs,Os,C_tildas\n",
    "        self.Xs  =Xs\n",
    "        \n",
    "        #return Zs,Hs,Cs,(Is,Fs,Os,C_tildas)\n",
    "        return Zs,Hs\n",
    "\n",
    "    def backward(self): # Ys,loss_function):\n",
    "        [Wi, bi,Wf, bf, Wo,bo,Wc, bc,Wy,by] = self.params\n",
    "\n",
    "        self.Hs,self.Cs,self.Is,self.Fs,self.Os,self.C_tildas = Hs,Cs,Is,Fs,Os,C_tildas\n",
    "        Xs = self.Xs\n",
    "\n",
    "        dWi,dWf,dWo,dWc,dWy  = np.zeros_like(Wi), np.zeros_like(Wf), np.zeros_like(Wo), np.zeros_like(Wc), np.zeros_like(Wy)\n",
    "        dbi,dbf,dbo,dbc,dby = np.zeros_like(bi), np.zeros_like(bf),  np.zeros_like(bo), np.zeros_like(bc), np.zeros_like(by)\n",
    "\n",
    "        dH_next = np.zeros_like(Hs[0])\n",
    "        dC_next = np.zeros_like(Cs[0])\n",
    "\n",
    "        input_dim = Xs[0].shape[1]\n",
    "\n",
    "        h = Hs\n",
    "        x = Xs\n",
    "\n",
    "        T = len(Xs)  \n",
    "        for t in reversed(range(T)):     \n",
    "            I = Is[t]\n",
    "            F = Fs[t]\n",
    "            O = Os[t]\n",
    "            C_tilda = C_tildas[t]\n",
    "            H = Hs[t]\n",
    "            X = Xs[t]\n",
    "            C = Cs[t]\n",
    "            H_pre =  Hs[t-1]\n",
    "            C_prev = Cs[t-1]\n",
    "            XH_pre = np.column_stack((X, H_pre))\n",
    "            XH_ = XH_pre\n",
    "\n",
    "            dZ = dZs[t]  \n",
    "\n",
    "            #输出f的模型参数的idu\n",
    "            dWy += np.dot(H.T,dZ)      \n",
    "            dby += np.sum(dZ, axis=0, keepdims=True)   \n",
    "\n",
    "            #隐状态h的梯度\n",
    "            dH = np.dot(dZ, Wy.T) + dH_next       \n",
    "          #  dC = dH_next*O*dtanh(C) +dC_next    #* H = O*np.tanh(C) \n",
    "          #  dC = dH_next*O*(1-np.square(np.tanh(C))) +dC_next\n",
    "            dC = dH*O*dtanh(C) +dC_next \n",
    "\n",
    "            dO = np.tanh(C) *dH \n",
    "            dOZ = O * (1-O)*dO                      \n",
    "            dWo += np.dot(XH_.T,dOZ)\n",
    "            dbo += np.sum(dOZ, axis=0, keepdims=True)              \n",
    "\n",
    "             #di                         \n",
    "            di =  C_tilda*dC\n",
    "            diZ = I*(1-I) * di\n",
    "            dWi += np.dot(XH_.T,diZ)\n",
    "            dbi += np.sum(diZ, axis=0, keepdims=True)  \n",
    "\n",
    "            #df\n",
    "            df = C_prev*dC\n",
    "            dfZ = F*(1-F) * df\n",
    "            dWf += np.dot(XH_.T,dfZ)\n",
    "            dbf += np.sum(dfZ, axis=0, keepdims=True)          \n",
    "\n",
    "            # dC_bar       \n",
    "            dC_tilda = I*dC                         #C = F * C + I * C_tilda        \n",
    "            dC_tilda_Z =(1-np.square(C_tilda))*dC_tilda    # C_tilda = sigmoid(np.dot(XH, Wc)+bc)         \n",
    "            dWc += np.dot(XH_.T,dC_tilda_Z)       \n",
    "            dbc += np.sum(dC_tilda_Z, axis=0, keepdims=True)\n",
    "\n",
    "\n",
    "            dXH_ = (np.dot(dfZ, Wf.T)\n",
    "                 + np.dot(diZ, Wi.T)\n",
    "                 + np.dot(dC_tilda_Z, Wc.T)\n",
    "                 + np.dot(dOZ, Wo.T))\n",
    "\n",
    "            dX_prev = dXH_[:, :input_dim]\n",
    "            dH_prev = dXH_[:, input_dim:]\n",
    "            dC_prev = F * dC\n",
    "\n",
    "            dC_next = dC_prev\n",
    "            dH_next = dH_prev              \n",
    "\n",
    "\n",
    "        #for dparam in [dWi, dbi,dWf, dbf, dWo,dbo,dWc, dbc,dWy,dby]:\n",
    "        #    np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients\n",
    "        grads = [dWi, dbi,dWf, dbf, dWo,dbo,dWc, dbc,dWy,dby]\n",
    "        grad_clipping(grads,5.)\n",
    "        for i,_ in enumerate(self.grads):\n",
    "            self.grads[i]+=grads[i]  \n",
    "    \n",
    "        return [dWi, dbi,dWf, dbf, dWo,dbo,dWc, dbc,dWy,dby]\n",
    "    \n",
    "    def parameters(self):\n",
    "        params = []\n",
    "        for i,_ in enumerate(self.params):\n",
    "            params.append([self.params[i],self.grads[i]])\n",
    "        return params\n",
    "\n",
    "import util\n",
    "def rnn_loss_grad_n2n(Fs,Ys,loss_fn = util.loss_gradient_softmax_crossentropy,flatten = True):   #rnn_loss_grad_t): #\n",
    "    loss = 0\n",
    "    dFs = {}\n",
    "    #losses = []\n",
    "    #dFs = []\n",
    "   \n",
    "    for t in range(len(Fs)):\n",
    "        F = Fs[t]\n",
    "        Y = Ys[t]   \n",
    "        if flatten and Y.ndim>=2:\n",
    "            #print(\"ffffffffffff\")\n",
    "            Y = Y.flatten()\n",
    "        loss_t,dF_t = loss_fn(F,Y)\n",
    "        loss += loss_t        \n",
    "        dFs[t] = dF_t\n",
    "        #losses.append(loss_t)\n",
    "        #dFs.append(dF_t)\n",
    "    return loss,dFs"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "7.8.2 GRU的代码实现"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "def sigmoid(x):\n",
    "    return 1 / (1 + np.exp(-x))\n",
    "\n",
    "def gru_init_params(input_dim,hidden_dim,output_dim,scale=0.01):\n",
    "    normal = lambda m,n : np.random.randn(m, n)*scale\n",
    "    three = lambda : (normal(input_dim,hidden_dim), normal(hidden_dim,hidden_dim),np.zeros((1,hidden_dim)))\n",
    "    \n",
    "    Wxu, Whu, bu = three()  # Update gate parameter\n",
    "    Wxr, Whr, br = three()  # Reset gate parameter\n",
    "    Wxh, Whh, bh = three()  # Candidate hidden state parameter\n",
    "    \n",
    "    Wy = normal(hidden_dim, output_dim)\n",
    "    by = np.zeros((1,output_dim))\n",
    "    \n",
    "    params = [Wxu, Whu, bu, Wxr, Whr, br, Wxh, Whh, bh, Wy,by]\n",
    "    return params\n",
    "\n",
    "def gru_state_init(batch_size, hidden_size):\n",
    "    return np.zeros((batch_size, hidden_size))\n",
    "\n",
    "def gru_forward(params,Xs, H_0):\n",
    "    Wxu, Whu, bu, Wxr, Whr, br, Wxh, Whh, bh, Wy,by = params\n",
    "    H = H_0   \n",
    "    Hs = {}\n",
    "    Ys = []\n",
    "    Hs[-1] = np.copy(H)    \n",
    "    Rs = []\n",
    "    Us = [] \n",
    "    H_tildas = [] \n",
    "\n",
    "    for t in range(len(Xs)):\n",
    "        X = Xs[t]\n",
    "        U = sigmoid(np.dot(X, Wxu) + np.dot(H, Whu) + bu)\n",
    "        R = sigmoid(np.dot(X, Wxr) + np.dot(H, Whr) + br)\n",
    "        H_tilda = np.tanh(np.dot(X, Wxh) + np.dot(R * H, Whh) + bh)\n",
    "        H = U * H + (1 - U) * H_tilda\n",
    "        Y = np.dot(H, Wy) + by\n",
    "                \n",
    "        Hs[t] = H\n",
    "        Ys.append(Y)\n",
    "        Rs.append(R)\n",
    "        Us.append(U) \n",
    "        H_tildas.append(H_tilda) \n",
    "                \n",
    "    return Ys,Hs,(Rs,Us,H_tildas)\n",
    "                \n",
    "def gru_backward(params,Xs,Hs,dZs,cache): # Ys,loss_function):\n",
    "    Wxu, Whu, bu, Wxr, Whr, br, Wxh, Whh, bh, Wy,by = params\n",
    "    Rs,Us,H_tildas = cache\n",
    "    \n",
    "    dWxu,dWhu,dWxr,dWhr,dWxh,dWhh,dWy  = np.zeros_like(Wxu), np.zeros_like(Whu), np.zeros_like(Wxr), np.zeros_like(Whr)\\\n",
    "                , np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Wy)\n",
    "    dbu,dbr,dbh,dby = np.zeros_like(bu), np.zeros_like(br),  np.zeros_like(bh), np.zeros_like(by)\n",
    "\n",
    "    dH_next = np.zeros_like(Hs[0])\n",
    "       \n",
    "    input_dim = Xs[0].shape[1]  \n",
    " \n",
    "    T = len(Xs)  \n",
    "    for t in reversed(range(T)):     \n",
    "        R = Rs[t]\n",
    "        U = Us[t]      \n",
    "        H = Hs[t]\n",
    "        X = Xs[t]\n",
    "        H_tilda = H_tildas[t]\n",
    "        H_pre =  Hs[t-1]\n",
    "       \n",
    "        dZ = dZs[t]\n",
    "         #输出f的模型参数的梯度\n",
    "        dWy += np.dot(H.T,dZ)      \n",
    "        dby += np.sum(dZ, axis=0, keepdims=True)   \n",
    "        \n",
    "        #隐状态h的梯度\n",
    "        dH = np.dot(dZ, Wy.T) + dH_next       \n",
    "   \n",
    "        #  H =  U H_pre+(1-U)H_tildas\n",
    "        dH_tilda = dH*(1-U)\n",
    "        dH_pre = dH*U\n",
    "        dU = H_pre*dH -H_tilda*dH\n",
    "\n",
    "        # H_tilda = tanh(X Wxh+(R*H_)Whh+bh)\n",
    "        dH_tildaZ = (1-np.square(H_tilda))*dH_tilda\n",
    "        dWxh+= np.dot(X.T,dH_tildaZ)\n",
    "        dWhh+= np.dot((R*H_pre).T,dH_tildaZ)\n",
    "        dbh += np.sum(dH_tildaZ, axis=0, keepdims=True)\n",
    "        \n",
    "        dR = np.dot(dH_tildaZ, Whh.T)*H_pre\n",
    "        dH_pre += np.dot(dH_tildaZ, Whh.T)*R\n",
    "                \n",
    "        # U = \\sigma(UZ)   R = \\sigma(RZ)                \n",
    "        dUZ = U*(1-U)*dU\n",
    "        dRZ = R*(1-R)*dR       \n",
    "       \n",
    "        dH_pre += np.dot(dUZ, Whu.T)\n",
    "        dH_pre += np.dot(dRZ, Whr.T)\n",
    "        \n",
    "        # R = \\sigma(X Wxr+H_ Whr + br)        \n",
    "        dWxr+= np.dot(X.T,dRZ)\n",
    "        dWhr+= np.dot(H_pre.T,dRZ)\n",
    "        dbr += np.sum(dRZ, axis=0, keepdims=True)\n",
    "        \n",
    "        dWxu+= np.dot(X.T,dUZ)\n",
    "        dWhu+= np.dot(H_pre.T,dUZ)\n",
    "        dbu += np.sum(dUZ, axis=0, keepdims=True)\n",
    "        \n",
    "        if True:\n",
    "            dX_RZ = np.dot(dRZ,Wxr.T)\n",
    "            dX_UZ = np.dot(dUZ,Wxu.T)\n",
    "            dX_H_tildaZ = np.dot(dH_tildaZ,Wxh.T)\n",
    "            dX = dX_RZ+dX_UZ+dX_H_tildaZ\n",
    "                \n",
    "        dH_next = dH_pre\n",
    "        \n",
    "    return [dWxu, dWhu, dbu, dWxr, dWhr, dbr, dWxh, dWhh, dbh, dWy,dby]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "用下面的代码检查分析梯度和数值梯度是否一致："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "T  = 3\n",
    "input_dim, hidden_dim,output_dim = 4,3,4\n",
    "batch_size = 1        \n",
    "Xs = np.random.randn(T,batch_size,input_dim)\n",
    "Ys = np.random.randint(output_dim, size=(T,batch_size))\n",
    " \n",
    "\n",
    "print(\"Xs\",Xs)\n",
    "print(\"Ys\",Ys)        \n",
    "        \n",
    "# cheack gradient  \n",
    "params = gru_init_params(input_dim, hidden_dim,output_dim)\n",
    "HC = gru_state_init(batch_size,hidden_dim)\n",
    "\n",
    "Zs,Hs,cache = gru_forward(params,Xs,HC) \n",
    "loss_function = rnn_loss_grad\n",
    "loss,dZs = loss_function(Zs,Ys)  \n",
    "grads = gru_backward(params,Xs,Hs,dZs,cache)\n",
    "\n",
    "def rnn_loss():\n",
    "    HC = gru_state_init(batch_size,hidden_dim)    \n",
    "    Zs,Hs,cache= gru_forward(params,Xs,HC) \n",
    "    loss_function = rnn_loss_grad\n",
    "    loss,dZs = loss_function(Zs,Ys)     \n",
    "    return loss\n",
    "\n",
    "numerical_grads = util.numerical_gradient(rnn_loss,params,1e-6) #rnn_numerical_gradient(rnn_loss,params,1e-10)\n",
    "#diff_error = lambda x, y: np.max( np.abs(x - y)/(np.maximum(1e-8, np.abs(x) + np.abs(y))))\n",
    "diff_error = lambda x, y: np.max( np.abs(x - y))\n",
    "                                 \n",
    "def rel_error(x, y):\n",
    "  \"\"\" returns relative error \"\"\"\n",
    "  return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))\n",
    "\n",
    "print(\"loss\",loss)\n",
    "print(\"[Wi, bi,Wf, bf, Wo,bo,Wc, bc,Wy,by] \")\n",
    "for i in range(len(grads)):\n",
    "    print(diff_error(grads[i],numerical_grads[i]))\n",
    "\n",
    "print(\"grads\",grads[0])\n",
    "print(\"numerical_grads\",numerical_grads[0])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7.9 循环神经网络的类表示与实现\n",
    "\n",
    "### 7.9.1  用类实现循环神经网络\n",
    "\n",
    "用一个类LSTM来表示前面的LSTM相关的函数："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "def grad_clipping(grads,alpha):\n",
    "    norm = math.sqrt(sum((grad ** 2).sum() for grad in grads))\n",
    "    if norm > alpha:\n",
    "        ratio = alpha / norm\n",
    "        for i in range(len(grads)):\n",
    "            grads[i]*=ratio \n",
    "            \n",
    "class LSTM(object):\n",
    "    def __init__(self,input_dim,hidden_dim,output_dim,scale=0.01):\n",
    "        #super(LSTM_cell, self).__init__()\n",
    "        self.input_dim,self.hidden_dim,self.output_dim = input_dim,hidden_dim,output_dim\n",
    "        normal = lambda m,n : np.random.randn(m, n)*scale\n",
    "        two = lambda : (normal(input_dim+hidden_dim, hidden_dim),np.zeros((1,hidden_dim)))\n",
    "\n",
    "        Wi, bi = two()  # Input gate parameters\n",
    "        Wf, bf = two()  # Forget gate parameters\n",
    "        Wo, bo = two()  # Output gate parameters\n",
    "        Wc, bc = two()  # Candidate cell parameters\n",
    "\n",
    "        Wy = normal(hidden_dim, output_dim)\n",
    "        by = np.zeros((1,output_dim))\n",
    "\n",
    "        #params = [Wi, bi,Wf, bf, Wo,bo, Wc,bc,Wy,by]\n",
    "        #  return params\n",
    "        self.params = [Wi, bi,Wf, bf, Wo,bo, Wc,bc,Wy,by]\n",
    "        self.grads = [np.zeros_like(param) for param in self.params]\n",
    "        self.H,self.C = None,None\n",
    "      \n",
    "    def reset_state(self,batch_size):\n",
    "        self.H,self.C = (np.zeros((batch_size, self.hidden_dim)),\n",
    "            np.zeros((batch_size, self.hidden_dim)))\n",
    "\n",
    "    def forward(self,Xs):\n",
    "        [Wi, bi,Wf, bf, Wo,bo,Wc,bc,Wy,by] = self.params   \n",
    "\n",
    "        if self.H is None or self.C is None:\n",
    "            self.reset_state(Xs[0].shape[0])  \n",
    "            \n",
    "        H, C =  self.H,self.C\n",
    "        Hs = {}\n",
    "        Cs = {}\n",
    "        Zs = []\n",
    "\n",
    "        Hs[-1] = np.copy(H)\n",
    "        Cs[-1] = np.copy(C)\n",
    "\n",
    "        Is = []\n",
    "        Fs = []\n",
    "        Os = []\n",
    "        C_tildas = []\n",
    "\n",
    "        for t in range(len(Xs)): \n",
    "            X = Xs[t]\n",
    "            XH = np.column_stack((X, H))\n",
    "            \n",
    "            I = sigmoid(np.dot(XH, Wi)+bi)\n",
    "            F = sigmoid(np.dot(XH, Wf)+bf)\n",
    "            O = sigmoid(np.dot(XH, Wo)+bo)\n",
    "            C_tilda = np.tanh(np.dot(XH, Wc)+bc)\n",
    "\n",
    "            C = F * C + I * C_tilda\n",
    "            H = O*np.tanh(C)       #O * C.tanh()  #输出状态 \n",
    "\n",
    "            Y = np.dot(H, Wy) + by        # 输出\n",
    "\n",
    "            Zs.append(Y)\n",
    "            Hs[t] = H\n",
    "            Cs[t] = C\n",
    "\n",
    "            Is.append(I)\n",
    "            Fs.append(F)\n",
    "            Os.append(O)\n",
    "            C_tildas.append(C_tilda)\n",
    "        self.Zs,self.Hs,self.Cs,self.Is,self.Fs,self.Os,self.C_tildas =  Zs,Hs,Cs,Is,Fs,Os,C_tildas\n",
    "        self.Xs  =Xs\n",
    "        \n",
    "        #return Zs,Hs,Cs,(Is,Fs,Os,C_tildas)\n",
    "        return Zs,Hs\n",
    "\n",
    "    def backward(self,dZs): # Ys,loss_function):\n",
    "        [Wi, bi,Wf, bf, Wo,bo,Wc, bc,Wy,by] = self.params\n",
    "\n",
    "        Hs,Cs,Is,Fs,Os,C_tildas = self.Hs,self.Cs,self.Is,self.Fs,self.Os,self.C_tildas\n",
    "        Xs = self.Xs\n",
    "\n",
    "        dWi,dWf,dWo,dWc,dWy  = np.zeros_like(Wi), np.zeros_like(Wf), np.zeros_like(Wo), np.zeros_like(Wc), np.zeros_like(Wy)\n",
    "        dbi,dbf,dbo,dbc,dby = np.zeros_like(bi), np.zeros_like(bf),  np.zeros_like(bo), np.zeros_like(bc), np.zeros_like(by)\n",
    "\n",
    "        dH_next = np.zeros_like(Hs[0])\n",
    "        dC_next = np.zeros_like(Cs[0])\n",
    "\n",
    "        input_dim = Xs[0].shape[1]\n",
    "\n",
    "        h = Hs\n",
    "        x = Xs\n",
    "\n",
    "        T = len(Xs)  \n",
    "        for t in reversed(range(T)):     \n",
    "            I = Is[t]\n",
    "            F = Fs[t]\n",
    "            O = Os[t]\n",
    "            C_tilda = C_tildas[t]\n",
    "            H = Hs[t]\n",
    "            X = Xs[t]\n",
    "            C = Cs[t]\n",
    "            H_pre =  Hs[t-1]\n",
    "            C_prev = Cs[t-1]\n",
    "            XH_pre = np.column_stack((X, H_pre))\n",
    "            XH_ = XH_pre\n",
    "\n",
    "            dZ = dZs[t]  \n",
    "\n",
    "            #输出f的模型参数的idu\n",
    "            dWy += np.dot(H.T,dZ)      \n",
    "            dby += np.sum(dZ, axis=0, keepdims=True)   \n",
    "\n",
    "            #隐状态h的梯度\n",
    "            dH = np.dot(dZ, Wy.T) + dH_next       \n",
    "          #  dC = dH_next*O*dtanh(C) +dC_next    #* H = O*np.tanh(C) \n",
    "          #  dC = dH_next*O*(1-np.square(np.tanh(C))) +dC_next\n",
    "            dC = dH*O*dtanh(C) +dC_next \n",
    "\n",
    "            dO = np.tanh(C) *dH \n",
    "            dOZ = O * (1-O)*dO                      \n",
    "            dWo += np.dot(XH_.T,dOZ)\n",
    "            dbo += np.sum(dOZ, axis=0, keepdims=True)              \n",
    "\n",
    "             #di                         \n",
    "            di =  C_tilda*dC\n",
    "            diZ = I*(1-I) * di\n",
    "            dWi += np.dot(XH_.T,diZ)\n",
    "            dbi += np.sum(diZ, axis=0, keepdims=True)  \n",
    "\n",
    "            #df\n",
    "            df = C_prev*dC\n",
    "            dfZ = F*(1-F) * df\n",
    "            dWf += np.dot(XH_.T,dfZ)\n",
    "            dbf += np.sum(dfZ, axis=0, keepdims=True)          \n",
    "\n",
    "            # dC_bar       \n",
    "            dC_tilda = I*dC                         #C = F * C + I * C_tilda        \n",
    "            dC_tilda_Z =(1-np.square(C_tilda))*dC_tilda    # C_tilda = sigmoid(np.dot(XH, Wc)+bc)         \n",
    "            dWc += np.dot(XH_.T,dC_tilda_Z)       \n",
    "            dbc += np.sum(dC_tilda_Z, axis=0, keepdims=True)\n",
    "\n",
    "\n",
    "            dXH_ = (np.dot(dfZ, Wf.T)\n",
    "                 + np.dot(diZ, Wi.T)\n",
    "                 + np.dot(dC_tilda_Z, Wc.T)\n",
    "                 + np.dot(dOZ, Wo.T))\n",
    "\n",
    "            dX_prev = dXH_[:, :input_dim]\n",
    "            dH_prev = dXH_[:, input_dim:]\n",
    "            dC_prev = F * dC\n",
    "\n",
    "            dC_next = dC_prev\n",
    "            dH_next = dH_prev              \n",
    "\n",
    "\n",
    "        #for dparam in [dWi, dbi,dWf, dbf, dWo,dbo,dWc, dbc,dWy,dby]:\n",
    "        #    np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients\n",
    "        grads = [dWi, dbi,dWf, dbf, dWo,dbo,dWc, dbc,dWy,dby]\n",
    "        grad_clipping(grads,5.)\n",
    "        for i,_ in enumerate(self.grads):\n",
    "            self.grads[i]+=grads[i]  \n",
    "    \n",
    "        return [dWi, dbi,dWf, dbf, dWo,dbo,dWc, dbc,dWy,dby]\n",
    "    \n",
    "    def parameters(self):\n",
    "        return self.params"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "T  = 3\n",
    "input_dim, hidden_dim,output_dim = 4,3,4\n",
    "batch_size = 2        \n",
    "Xs = np.random.randn(T,batch_size,input_dim)\n",
    "Ys = np.random.randint(output_dim, size=(T,batch_size))\n",
    " \n",
    "print(\"Xs\",Xs)\n",
    "print(\"Ys\",Ys)   \n",
    "\n",
    "lstm = LSTM(input_dim, hidden_dim,output_dim)\n",
    "Zs,Hs = lstm.forward(Xs) \n",
    "\n",
    "loss_function = rnn_loss_grad\n",
    "loss,dZs = loss_function(Zs,Ys)  \n",
    "grads = lstm.backward(dZs)\n",
    "\n",
    "def rnn_loss():\n",
    "    lstm.reset_state(batch_size)    \n",
    "    Zs,Hs = lstm.forward(Xs) \n",
    "    loss_function = rnn_loss_grad\n",
    "    loss,dZs = loss_function(Zs,Ys)     \n",
    "    return loss\n",
    "\n",
    "params = lstm.parameters()\n",
    "numerical_grads = util.numerical_gradient(rnn_loss,params,1e-6) #rnn_numerical_gradient(rnn_loss,params,1e-10)\n",
    "#diff_error = lambda x, y: np.max( np.abs(x - y)/(np.maximum(1e-8, np.abs(x) + np.abs(y))))\n",
    "diff_error = lambda x, y: np.max( np.abs(x - y))\n",
    "\n",
    "print(\"loss\",loss)\n",
    "print(\"[Wi, bi,Wf, bf, Wo,bo,Wc, bc,Wy,by] \")\n",
    "for i in range(len(grads)):\n",
    "    print(diff_error(grads[i],numerical_grads[i]))\n",
    "\n",
    "print(\"grads\",grads[0])\n",
    "print(\"numerical_grads\",numerical_grads[0])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "下面的GRU实现了一个GRU结构的循环神经网络："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class GRU(object):\n",
    "    def __init__(self, input_dim,hidden_dim,output_dim,scale=0.01): \n",
    "        super(GRU, self).__init__()\n",
    "        self.input_dim,self.hidden_dim,self.output_dim,self.scale = input_dim,hidden_dim,output_dim,scale\n",
    "        \n",
    "        normal = lambda m,n : np.random.randn(m, n)*scale\n",
    "        three = lambda : (normal(input_dim,hidden_dim), normal(hidden_dim,hidden_dim),np.zeros((1,hidden_dim)))\n",
    "\n",
    "        Wxu, Whu, bu = three()  # Update gate parameter\n",
    "        Wxr, Whr, br = three()  # Reset gate parameter\n",
    "        Wxh, Whh, bh = three()  # Candidate hidden state parameter\n",
    "\n",
    "        Wy = normal(hidden_dim, output_dim)\n",
    "        by = np.zeros((1,output_dim))\n",
    "\n",
    "        self.Wxu, self.Whu, self.bu, self.Wxr, self.Whr, self.br, self.Wxh, self.Whh, self.bh, self.Wy,self.by = Wxu, Whu, bu, Wxr, Whr, br, Wxh, Whh, bh, Wy,by\n",
    "        \n",
    "        self.params = [Wxu, Whu, bu, Wxr, Whr, br, Wxh, Whh, bh, Wy,by]\n",
    "            # [dWxu, dWhu, dbu, dWxr, dWhr, dbr, dWxh, dWhh, dbh, dWy,dby]\n",
    "        self.grads = [np.zeros_like(param) for param in self.params]\n",
    "        \n",
    "        self.H = None\n",
    "        #params = [Wxu, Whu, bu, Wxr, Whr, br, Wxh, Whh, bh, Wy,by] \n",
    "        #return params   \n",
    "\n",
    "    def reset_state(self,batch_size):\n",
    "        self.H = np.zeros((batch_size, self.hidden_dim))       \n",
    "\n",
    "    def forward_step(self,X):\n",
    "        Wxu, Whu, bu, Wxr, Whr, br, Wxh, Whh, bh, Wy,by = self.params            \n",
    "        H = self.H # previous state\n",
    "        X = Xs[t]\n",
    "        U = sigmoid(np.dot(X, Wxu) + np.dot(H, Whu) + bu)\n",
    "        R = sigmoid(np.dot(X, Wxr) + np.dot(H, Whr) + br)\n",
    "        H_tilda = np.tanh(np.dot(X, Wxh) + np.dot(R * H, Whh) + bh)\n",
    "        H = U * H + (1 - U) * H_tilda\n",
    "        Y = np.dot(H, Wy) + by\n",
    "\n",
    "        \n",
    "        Hs[t] = H\n",
    "        Ys.append(Y)\n",
    "        Rs.append(R)\n",
    "        Us.append(U) \n",
    "        H_tildas.append(H_tilda)\n",
    "\n",
    "    def forward(self,Xs):\n",
    "        Wxu, Whu, bu, Wxr, Whr, br, Wxh, Whh, bh, Wy,by = self.params\n",
    "        if self.H is None:\n",
    "            self.reset_state(Xs[0].shape[0])  \n",
    "        H = self.H   \n",
    "        Hs = {}\n",
    "        Ys = []\n",
    "        Hs[-1] = np.copy(H)    \n",
    "        Rs = []\n",
    "        Us = [] \n",
    "        H_tildas = [] \n",
    "\n",
    "        for t in range(len(Xs)):\n",
    "            X = Xs[t]\n",
    "            U = sigmoid(np.dot(X, Wxu) + np.dot(H, Whu) + bu)\n",
    "            R = sigmoid(np.dot(X, Wxr) + np.dot(H, Whr) + br)\n",
    "            H_tilda = np.tanh(np.dot(X, Wxh) + np.dot(R * H, Whh) + bh)\n",
    "            H = U * H + (1 - U) * H_tilda\n",
    "            Y = np.dot(H, Wy) + by\n",
    "\n",
    "            Hs[t] = H\n",
    "            Ys.append(Y)\n",
    "            Rs.append(R)\n",
    "            Us.append(U) \n",
    "            H_tildas.append(H_tilda)\n",
    "        \n",
    "        self.Ys,self.Hs,self.Rs,self.Us,self.H_tildas = Ys,Hs,Rs,Us,H_tildas\n",
    "        return Ys,Hs\n",
    "        #return Ys,Hs,(Rs,Us,H_tildas)\n",
    "\n",
    "    def backward(self,dZs): # Ys,loss_function):\n",
    "        Wxu, Whu, bu, Wxr, Whr, br, Wxh, Whh, bh, Wy,by = self.params\n",
    "        Ys,Hs,Rs,Us,H_tildas = self.Ys,self.Hs,self.Rs,self.Us,self.H_tildas\n",
    "        \n",
    "\n",
    "        dWxu,dWhu,dWxr,dWhr,dWxh,dWhh,dWy  = np.zeros_like(Wxu), np.zeros_like(Whu), np.zeros_like(Wxr), np.zeros_like(Whr)\\\n",
    "                    , np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Wy)        \n",
    "        dbu,dbr,dbh,dby = np.zeros_like(bu), np.zeros_like(br),  np.zeros_like(bh), np.zeros_like(by)\n",
    "\n",
    "        dH_next = np.zeros_like(Hs[0])\n",
    "\n",
    "        input_dim = Xs[0].shape[1]  \n",
    "\n",
    "        T = len(Xs)  \n",
    "        for t in reversed(range(T)):     \n",
    "            R = Rs[t]\n",
    "            U = Us[t]      \n",
    "            H = Hs[t]\n",
    "            X = Xs[t]\n",
    "            H_tilda = H_tildas[t]\n",
    "            H_pre =  Hs[t-1]\n",
    "\n",
    "            dZ = dZs[t]\n",
    "             #输出f的模型参数的idu\n",
    "            dWy += np.dot(H.T,dZ)      \n",
    "            dby += np.sum(dZ, axis=0, keepdims=True)   \n",
    "\n",
    "            #隐状态h的梯度\n",
    "            dH = np.dot(dZ, Wy.T) + dH_next       \n",
    "\n",
    "            #  H =  U H_pre+(1-U)H_tildas\n",
    "            dH_tilda = dH*(1-U)\n",
    "            dH_pre = dH*U\n",
    "            dU = H_pre*dH -H_tilda*dH\n",
    "\n",
    "            # H_tilda = tanh(X Wxh+(R*H_)Whh+bh)\n",
    "            dH_tildaZ = (1-np.square(H_tilda))*dH_tilda\n",
    "            dWxh+= np.dot(X.T,dH_tildaZ)\n",
    "            dWhh+= np.dot((R*H_pre).T,dH_tildaZ)\n",
    "            dbh += np.sum(dH_tildaZ, axis=0, keepdims=True)\n",
    "\n",
    "            dR = np.dot(dH_tildaZ, Whh.T)*H_pre\n",
    "            dH_pre += np.dot(dH_tildaZ, Whh.T)*R\n",
    "\n",
    "            # U = \\sigma(UZ)   R = \\sigma(RZ)                \n",
    "            dUZ = U*(1-U)*dU\n",
    "            dRZ = R*(1-R)*dR       \n",
    "\n",
    "            dH_pre += np.dot(dUZ, Whu.T)\n",
    "            dH_pre += np.dot(dRZ, Whr.T)\n",
    "\n",
    "            # R = \\sigma(X Wxr+H_ Whr + br)        \n",
    "            dWxr+= np.dot(X.T,dRZ)\n",
    "            dWhr+= np.dot(H_pre.T,dRZ)\n",
    "            dbr += np.sum(dRZ, axis=0, keepdims=True)\n",
    "\n",
    "            dWxu+= np.dot(X.T,dUZ)\n",
    "            dWhu+= np.dot(H_pre.T,dUZ)\n",
    "            dbu += np.sum(dUZ, axis=0, keepdims=True)\n",
    "\n",
    "            if True:\n",
    "                dX_RZ = np.dot(dRZ,Wxr.T)\n",
    "                dX_UZ = np.dot(dUZ,Wxu.T)\n",
    "                dX_H_tildaZ = np.dot(dH_tildaZ,Wxh.T)\n",
    "                dX = dX_RZ+dX_UZ+dX_H_tildaZ\n",
    "            \n",
    "            dH_next = dH_pre\n",
    "        \n",
    "        grads = [dWxu, dWhu, dbu, dWxr, dWhr, dbr, dWxh, dWhh, dbh, dWy,dby]\n",
    "        for i,_ in enumerate(self.grads):\n",
    "            self.grads[i]+=grads[i]\n",
    "         \n",
    "        return self.grads\n",
    "        #return [dWxu, dWhu, dbu, dWxr, dWhr, dbr, dWxh, dWhh, dbh, dWy,dby]\n",
    "    \n",
    "\n",
    "    def get_states(self):\n",
    "        return self.Hs  \n",
    "\n",
    "    def get_outputs(self):\n",
    "        return self.Ys\n",
    "    \n",
    "    def parameters(self):\n",
    "        return self.params"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "T  = 3\n",
    "input_dim, hidden_dim,output_dim = 4,3,4\n",
    "batch_size = 2        \n",
    "Xs = np.random.randn(T,batch_size,input_dim)\n",
    "Ys = np.random.randint(output_dim, size=(T,batch_size))\n",
    " \n",
    "print(\"Xs\",Xs)\n",
    "print(\"Ys\",Ys)   \n",
    "\n",
    "gru = GRU(input_dim, hidden_dim,output_dim)\n",
    "Zs,Hs = gru.forward(Xs) \n",
    "\n",
    "loss_function = rnn_loss_grad\n",
    "loss,dZs = loss_function(Zs,Ys)  \n",
    "grads = gru.backward(dZs)\n",
    "\n",
    "def rnn_loss():\n",
    "    lstm.reset_state(batch_size)    \n",
    "    Zs,Hs = gru.forward(Xs) \n",
    "    loss_function = rnn_loss_grad\n",
    "    loss,dZs = loss_function(Zs,Ys)     \n",
    "    return loss\n",
    "\n",
    "params = gru.parameters()\n",
    "numerical_grads = util.numerical_gradient(rnn_loss,params,1e-6) #rnn_numerical_gradient(rnn_loss,params,1e-10)\n",
    "#diff_error = lambda x, y: np.max( np.abs(x - y)/(np.maximum(1e-8, np.abs(x) + np.abs(y))))\n",
    "diff_error = lambda x, y: np.max( np.abs(x - y))\n",
    "\n",
    "print(\"loss\",loss)\n",
    "print(\"[Wxu, Whu, bu, Wxr, Whr, br, Wxh, Whh, bh, Wy,by] \")\n",
    "for i in range(len(grads)):  \n",
    "    print(diff_error(grads[i],numerical_grads[i]))\n",
    "\n",
    "print(\"grads\",grads[0])\n",
    "print(\"numerical_grads\",numerical_grads[0])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 7.9.2 循环神经网络单元的类实现\n",
    "\n",
    "循环神经网络的最基本计算是神经网络单元的在某个时刻的正向和反向计算，在某个时刻，神经网络单元接受数据输入$x$和上一个时间步的状态输入$h$，对于简单RNN和GRU，输出的是当前时间步的状态$h'$，对于LSTM输出的当前记忆存储$c'$和传入下一时间步的$h'$。例如，对简单的RNN，其正向计算公式是：\n",
    "\n",
    "$$h' = \\tanh(W_{ih} x + b_{ih}  +  W_{hh} h + b_{hh}) \\tag{7-39}$$\n",
    "\n",
    "这里将原来的偏置$b_h$拆成了2项：$b_{ih},b_{hh}$。分别表示数据输入加权和的偏置和隐状态加权和的偏置。同样的，对于LSTM，也可以将原来的每个加权和的一个偏置拆成2个偏置，即LSTM的计算公式：\n",
    "\n",
    "$$\\begin{array}{ll}\n",
    "        i = \\sigma(W_{ii} x + b_{ii} + W_{hi} h + b_{hi}) \\\\\n",
    "        f = \\sigma(W_{if} x + b_{if} + W_{hf} h + b_{hf}) \\\\\n",
    "        g = \\tanh(W_{ig} x + b_{ig} + W_{hg} h + b_{hg}) \\\\\n",
    "        o = \\sigma(W_{io} x + b_{io} + W_{ho} h + b_{ho}) \\\\\n",
    "        c' = f * c + i * g \\\\\n",
    "        h' = o * \\tanh(c') \\\\\n",
    "        \\end{array} \\tag{7-40} $$\n",
    "\n",
    "类似的，GRU神经网络单元的计算公式为：\n",
    "\n",
    "$$\\begin{array}{ll}\n",
    "        r = \\sigma(W_{ir} x + b_{ir} + W_{hr} h + b_{hr}) \\\\\n",
    "        z = \\sigma(W_{iz} x + b_{iz} + W_{hz} h + b_{hz}) \\\\\n",
    "        n = \\tanh(W_{in} x + b_{in} + r * (W_{hn} h + b_{hn})) \\\\\n",
    "        h' = (1 - z) * n + z * h\n",
    "        \\end{array} \\tag{7-41} $$\n",
    "可以用一个公共的基类表示这3个不同神经网络单元的公共属性："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import math \n",
    "class RNNCellBase(object):\n",
    "    __constants__ = ['input_size', 'hidden_size']\n",
    "    def __init__(self, input_size, hidden_size,bias, num_chunks):\n",
    "        super(RNNCellBase, self).__init__()        \n",
    "        self.input_size, self.hidden_size = input_size, hidden_size\n",
    "        self.bias = bias\n",
    "        self.W_ih= np.empty((input_size, num_chunks*hidden_size))   # input to hidden\n",
    "        self.W_hh = np.empty((hidden_size, num_chunks*hidden_size))  # hidden to hidden\n",
    "        if bias:\n",
    "            self.b_ih = np.zeros((1,num_chunks*hidden_size))\n",
    "            self.b_hh = np.zeros((1,num_chunks*hidden_size))\n",
    "            self.params = [self.W_ih,self.W_hh,self.b_ih,self.b_hh]\n",
    "        else:\n",
    "            self.b_ih = None\n",
    "            self.b_hh = None\n",
    "            self.params = [self.W_ih,self.W_hh]      \n",
    "        \n",
    "        self.grads = [np.zeros_like(param)for param in self.params]\n",
    "        self.param_grads = self.params.copy()\n",
    "        self.param_grads.extend(self.grads)\n",
    "        \n",
    "        self.reset_parameters()\n",
    "      \n",
    "    def parameters(self,no_grad = True):\n",
    "        if no_grad:   return self.params;  \n",
    "        return self.param_grads;            \n",
    "            \n",
    "    def reset_parameters(self):\n",
    "        stdv = 1.0 / math.sqrt(self.hidden_size)\n",
    "        for param in self.params:\n",
    "            w = param\n",
    "            w[:] = np.random.uniform(-stdv, stdv,(w.shape))\n",
    "            \n",
    "    def check_forward_input(self, input):\n",
    "        if input.shape[1] != self.input_size:\n",
    "            raise RuntimeError(\n",
    "                \"input has inconsistent input_size: got {}, expected {}\".format(\n",
    "                    input.shape[1], self.input_size))\n",
    "\n",
    "    def check_forward_hidden(self, input, h, hidden_label=''):      \n",
    "        if input.shape[0] != h.shape[0]:\n",
    "            raise RuntimeError(\n",
    "                \"Input batch size {} doesn't match hidden{} batch size {}\".format(\n",
    "                    input.shape[0], hidden_label, h.shape[0]))\n",
    "\n",
    "        if h.shape[1] != self.hidden_size:\n",
    "            raise RuntimeError(\n",
    "                \"hidden{} has inconsistent hidden_size: got {}, expected {}\".format(\n",
    "                    hidden_label, h.shape[1], self.hidden_size)) "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "定义了表示简单RNN单元的类RNNCell:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def relu(x):\n",
    "    return x * (x > 0)\n",
    "\n",
    "def rnn_tanh_cell(x, h,W_ih, W_hh,b_ih, b_hh):\n",
    "    #h' = \\tanh(W_{ih} x + b_{ih}  +  W_{hh} h + b_{hh})\n",
    "    if b_ih is None:        \n",
    "        return np.tanh(np.dot(x,W_ih) +  np.dot(h,W_hh))\n",
    "    else:\n",
    "        return np.tanh(np.dot(x,W_ih) + b_ih  +  np.dot(h,W_hh) + b_hh)   \n",
    "    \n",
    "def rnn_relu_cell(x, h,W_ih,W_hh,b_ih, b_hh):\n",
    "    #h' = \\relu(W_{ih} x + b_{ih}  +  W_{hh} h + b_{hh})\n",
    "    if b_ih is None:\n",
    "        return relu(np.dot(x,W_ih) +  np.dot(h,W_hh) )\n",
    "    else:\n",
    "        return relu(np.dot(x,W_ih) + b_ih  +  np.dot(h,W_hh) + b_hh)       \n",
    "    \n",
    "class RNNCell(RNNCellBase):\n",
    "    \"\"\"        h' = \\tanh(W_{ih} x + b_{ih}  +  W_{hh} h + b_{hh})\"\"\"    \n",
    "    __constants__ = ['input_size', 'hidden_size',  'nonlinearity']\n",
    "\n",
    "    def __init__(self, input_size, hidden_size,bias=True, nonlinearity=\"tanh\"):\n",
    "        super(RNNCell, self).__init__(input_size, hidden_size,bias,num_chunks=1)\n",
    "        self.nonlinearity = nonlinearity\n",
    "        \n",
    "    def forward(self, input, h=None): \n",
    "        self.check_forward_input(input)\n",
    "        if h is None:\n",
    "            h = np.zeros(input.shape[0], self.hidden_size, dtype=input.dtype)\n",
    "        self.check_forward_hidden(input, h, '')\n",
    "        if self.nonlinearity == \"tanh\":\n",
    "            ret = rnn_tanh_cell( input, h,\n",
    "                self.W_ih, self.W_hh,\n",
    "                self.b_ih, self.b_hh,)\n",
    "        elif self.nonlinearity == \"relu\":\n",
    "            ret = rnn_relu_cell( input, h,\n",
    "                self.W_ih, self.W_hh,\n",
    "                self.b_ih, self.b_hh,)\n",
    "        else:\n",
    "            ret = input  \n",
    "            raise RuntimeError(\n",
    "                \"Unknown nonlinearity: {}\".format(self.nonlinearity))\n",
    "        return ret\n",
    "    def __call__(self, input, h=None): \n",
    "        return self.forward(input,h)\n",
    "    \n",
    "    def backward(self,dh,H,X,H_pre):\n",
    "        if self.nonlinearity == \"tanh\":\n",
    "            dZh = (1 - H * H) * dh # backprop through tanh nonlinearity\n",
    "        else:\n",
    "            dZh = H*(1-H)* dh        \n",
    "        db_hh = np.sum(dZh, axis=0, keepdims=True) \n",
    "        db_ih = np.sum(dZh, axis=0, keepdims=True) \n",
    "        dW_ih = np.dot(X.T,dZh)\n",
    "        dW_hh = np.dot(H_pre.T,dZh)\n",
    "        dh_pre = np.dot(dZh,self.W_hh.T)\n",
    "        dx =  np.dot(dZh,self.W_ih.T)\n",
    "        grads = (dW_ih,dW_hh,db_ih,db_hh)\n",
    "        for a, b in zip(self.grads,grads):\n",
    "            a+=b        \n",
    "        return dx,dh_pre,grads"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "下面代码示范了RNNCell的一个时间步的正向和反向计算，其中x是批大小为3的输入数据，而h是对应批大小为3的状态："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "h_: [[ 0.42816071 -0.93391142 -0.09318246  0.36981243  0.58315586  0.58441657\n",
      "  -0.64776787 -0.24944403 -0.13190592  0.31128476  0.55425403 -0.1986646\n",
      "   0.52432852 -0.60473608  0.43623263 -0.48959648 -0.71410221  0.70237457\n",
      "  -0.73622656  0.01598786]\n",
      " [ 0.72310225 -0.5286474   0.26030019 -0.013299    0.40588113  0.44826108\n",
      "   0.54452866 -0.52664533  0.03804185 -0.54873226  0.36711887 -0.05333279\n",
      "   0.60378345 -0.7142368  -0.42034846 -0.70053035 -0.71392327 -0.37169453\n",
      "  -0.07726286  0.45857011]\n",
      " [ 0.82057664 -0.75346798  0.09852887  0.0913517  -0.14855558  0.22773312\n",
      "  -0.07206941 -0.11684849 -0.63141815  0.77970711 -0.28520141  0.52957509\n",
      "  -0.38418741 -0.10004893 -0.31241743  0.25673323 -0.28579384  0.7648889\n",
      "  -0.61030835 -0.43727677]]\n",
      "dh: [[-0.24924055 -0.60182045 -0.46673386  0.27244745 -0.71112983 -0.1528452\n",
      "   0.02957395 -0.14058541 -0.55421028 -0.53787714 -0.26997871 -0.3888154\n",
      "  -0.27366057  0.73394301  0.76371757 -0.32362572 -0.72360284  0.0827917\n",
      "  -0.10417667 -0.06167717]\n",
      " [-0.01478511  0.11647635  0.65959522  0.03227447  0.56160275  0.42892556\n",
      "  -0.02010594  0.61050929 -0.04549744 -0.05391427  0.51613862  0.04445541\n",
      "  -0.05494767  0.35450059 -1.03299634 -0.07851641  0.85444196 -0.0639933\n",
      "  -0.13658307  0.01579437]\n",
      " [-0.35543278  0.36188807 -0.76171418  0.29340666  0.22050786  0.05230421\n",
      "  -0.04009951  0.05390205  0.33079738 -0.3180523  -0.6626584  -0.02439556\n",
      "  -0.24605205  0.03644055  0.40583728 -0.29546359 -0.51967787 -0.34771026\n",
      "   0.39365535 -0.22932589]]\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "np.random.seed(1)\n",
    "x = np.random.randn(3, 10)  #(batch_size,input_dim)\n",
    "h = np.random.randn(3, 20)  #(batch_size,hidden_dim)\n",
    "rnn = RNNCell(10, 20)       #(input_dim,hidden_dim)\n",
    "\n",
    "h_ = rnn(x, h)\n",
    "print(\"h_:\",h_)\n",
    "dh_ = np.random.randn(*h.shape)\n",
    "dx,dh,_ = rnn.backward(dh_,h_,x,h)\n",
    "print(\"dh:\",dh)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "下面代码用序列数据x，演示了步长为6的RNNCell的计算过程："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "h: [[ 0.02591013  0.55596904  0.88776786 -0.30259903 -0.52486047  0.52424018\n",
      "  -0.27195195 -0.39829933  0.26264842 -0.41462555  0.85312201 -0.07127092\n",
      "   0.5249446   0.56521169 -0.96605763 -0.56399649 -0.42170925  0.51792634\n",
      "  -0.48083172  0.8081585 ]\n",
      " [-0.41431091 -0.19629144  0.07812999  0.60036895 -0.66809188  0.01850902\n",
      "  -0.13318301  0.05429666 -0.52273399  0.0511763   0.6836469  -0.49816267\n",
      "  -0.65907586  0.06392608 -0.21453917  0.64064104 -0.71027156 -0.65742462\n",
      "   0.85250721  0.7947783 ]\n",
      " [ 0.0963808  -0.0801268  -0.29529377 -0.15654982 -0.26685841  0.42867349\n",
      "   0.10294473 -0.33910771  0.08601856 -0.208398    0.12351004 -0.21096505\n",
      "  -0.03701162  0.24458415  0.02940568  0.07932559 -0.18343557  0.32865606\n",
      "   0.60922849  0.71300271]]\n",
      "dh: [[-0.00019024  0.0064997   0.00700269  0.00023613 -0.00990561 -0.00226841\n",
      "   0.0012073   0.00569657 -0.00382987  0.00562598 -0.00281138 -0.00601363\n",
      "  -0.00523546 -0.00048767  0.00336776  0.0004302   0.00309408 -0.00344074\n",
      "  -0.00433494  0.00014678]\n",
      " [ 0.02580657  0.04296269  0.00574297 -0.01487176 -0.01743256  0.03280786\n",
      "  -0.01896186 -0.01698244  0.0395379   0.01565118 -0.0044575   0.01083449\n",
      "  -0.00330879  0.05841741  0.00425834  0.02722163 -0.01025379  0.02958938\n",
      "   0.03793763 -0.01245477]\n",
      " [ 0.02612048  0.02278542 -0.00309881 -0.03783893 -0.01705496  0.02166877\n",
      "  -0.00804965  0.00863808 -0.00988581  0.02456897 -0.00800725 -0.00319803\n",
      "  -0.01548033 -0.00579467 -0.00242898  0.00414448  0.0034383   0.00126782\n",
      "   0.00624222 -0.0068162 ]]\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "x = np.random.randn(6, 3, 10)\n",
    "h = np.random.randn(3, 20)\n",
    "rnn = RNNCell(10, 20)\n",
    "\n",
    "h_0 = h.copy()\n",
    "hs = []\n",
    "for i in range(6):\n",
    "    h = rnn(x[i], h)\n",
    "    hs.append(h)\n",
    "print(\"h:\",hs[0])\n",
    "\n",
    "dh = np.random.randn(*h.shape)\n",
    "for i in reversed(range(6)):\n",
    "    if i==0:\n",
    "        dx,dh,_ = rnn.backward(dh,hs[i],x[i],h_0)\n",
    "    else:\n",
    "        dx,dh,_ = rnn.backward(dh,hs[i],x[i],hs[i-1])\n",
    "print(\"dh:\",dh)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "同样的，可以定义LSTM和GRU类型的循环神经网络单元LSTMCell和 GRUCell。LSTMCell的代码如下："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [],
   "source": [
    "def sigmoid(x):\n",
    "    return (1 / (1 + np.exp(-x)))\n",
    "def lstm_cell(x, hc,w_ih, w_hh,b_ih, b_hh): \n",
    "    h,c = hc[0],hc[1]\n",
    "    hidden_size = w_ih.shape[1]//4\n",
    "    ifgo_Z = np.dot(x,w_ih) + b_ih  +  np.dot(h,w_hh) + b_hh\n",
    "    i = sigmoid(ifgo_Z[:,:hidden_size])\n",
    "    f = sigmoid(ifgo_Z[:,hidden_size:2*hidden_size])\n",
    "    g = np.tanh(ifgo_Z[:,2*hidden_size:3*hidden_size])\n",
    "    o = sigmoid(ifgo_Z[:,3*hidden_size:])   \n",
    "    c_ = f*c+i*g\n",
    "    h_ = o*np.tanh(c_)\n",
    "    return (h_,c_),np.column_stack((i,f,g,o))\n",
    "\n",
    "def lstm_cell_back(dhc,ifgo,x,hc_pre,w_ih, w_hh,b_ih, b_hh):\n",
    "    hidden_size = w_ih.shape[1]//4\n",
    "    if isinstance(dhc, tuple):\n",
    "        dh_,dc_next = dhc\n",
    "    else:\n",
    "        dh_ = dhc\n",
    "        dc_next = np.zeros_like(dh_)\n",
    "    h_pre,c = hc_pre\n",
    "    i,f,g,o = ifgo[:,:hidden_size],ifgo[:,hidden_size:2*hidden_size]\\\n",
    "              , ifgo[:,2*hidden_size:3*hidden_size],ifgo[:,3*hidden_size:]\n",
    "    c_ = f*c+i*g\n",
    "    dc_ = dc_next+dh_*o*(1-np.square(np.tanh(c_)))\n",
    "    do = dh_*np.tanh(c_)\n",
    "    di = dc_*g\n",
    "    dg = dc_*i\n",
    "    df = dc_*c\n",
    "    \n",
    "    diz = i*(1-i)*di\n",
    "    dfz = f*(1-f)*df\n",
    "    dgz = (1-np.square(g))*dg\n",
    "    doz = o*(1-o)*do\n",
    "    \n",
    "    dZ = np.column_stack((diz,dfz,dgz,doz))\n",
    "    \n",
    "    dW_ih = np.dot(x.T,dZ)\n",
    "    dW_hh = np.dot(h_pre.T,dZ)\n",
    "    db_hh = np.sum(dZ, axis=0, keepdims=True) \n",
    "    db_ih = np.sum(dZ, axis=0, keepdims=True) \n",
    "    dx =  np.dot(dZ,w_ih.T)\n",
    "    dh_pre = np.dot(dZ,w_hh.T)\n",
    "    #return dx,dh_pre,(dW_ih,dW_hh,db_ih,db_hh)\n",
    "    dc = dc_*f\n",
    "    return dx,(dh_pre,dc),(dW_ih,dW_hh,db_ih,db_hh)\n",
    "\n",
    "class LSTMCell(RNNCellBase):\n",
    "    \"\"\"   \\begin{array}{ll}\n",
    "        i = \\sigma(W_{ii} x + b_{ii} + W_{hi} h + b_{hi}) \\\\\n",
    "        f = \\sigma(W_{if} x + b_{if} + W_{hf} h + b_{hf}) \\\\\n",
    "        g = \\tanh(W_{ig} x + b_{ig} + W_{hg} h + b_{hg}) \\\\\n",
    "        o = \\sigma(W_{io} x + b_{io} + W_{ho} h + b_{ho}) \\\\\n",
    "        c' = f * c + i * g \\\\\n",
    "        h' = o * \\tanh(c') \\\\\n",
    "        \\end{array}\n",
    "        \n",
    "        Inputs: input, (h_0, c_0)\n",
    "        If `(h_0, c_0)` is not provided, both **h_0** and **c_0** default to zero.\n",
    "        \n",
    "        Outputs: (h_1, c_1)\n",
    "        - **h_1** of shape `(batch, hidden_size)`: tensor containing the next hidden state\n",
    "          for each element in the batch\n",
    "        - **c_1** of shape `(batch, hidden_size)`: tensor containing the next cell state\n",
    "          for each element in the batch \n",
    "        \"\"\"\n",
    "\n",
    "    def __init__(self, input_size, hidden_size, bias=True):\n",
    "        super(LSTMCell, self).__init__(input_size, hidden_size,bias, num_chunks=4)\n",
    "     \n",
    "    def init_hidden(batch_size):\n",
    "        zeros= np.zeros(input.shape[0], self.hidden_size, dtype=input.dtype)  \n",
    "        return (zeros, zeros)#np.array([zeros, zeros])\n",
    "                \n",
    "    def forward(self, input, h=None): \n",
    "        self.check_forward_input(input)\n",
    "        if h is None:\n",
    "            h = init_hidden(input.shape[0])\n",
    "            #zeros= np.zeros(input.shape[0], self.hidden_size, dtype=input.dtype)  \n",
    "            #h = (zeros, zeros)#np.array([zeros, zeros])\n",
    "        self.check_forward_hidden(input, h[0], '[0]')\n",
    "        self.check_forward_hidden(input, h[1], '[1]')\n",
    "        return lstm_cell(\n",
    "                input, h,\n",
    "                self.W_ih, self.W_hh,\n",
    "                self.b_ih, self.b_hh,\n",
    "            )\n",
    "    def __call__(self, input, h=None): \n",
    "        return self.forward(input,h)\n",
    "    \n",
    "    def backward(self, dhc,ifgo,input,hc_pre):\n",
    "        if hc_pre is None:\n",
    "            hc_pre = init_hidden(input.shape[0])\n",
    "        dx,dh_pre,grads = lstm_cell_back(\n",
    "                            dhc,ifgo,\n",
    "                            input, hc_pre,\n",
    "                            self.W_ih, self.W_hh,\n",
    "                            self.b_ih, self.b_hh)\n",
    "            \n",
    "        #grads = (dW_ih,dW_hh,db_ih,db_hh)\n",
    "        for a, b in zip(self.grads,grads):\n",
    "            a+=b \n",
    "        return dx,dh_pre,grads "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "GRUCell的代码如下："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [],
   "source": [
    "def gru_cell(x, h,w_ih, w_hh,b_ih, b_hh):\n",
    "    Z_ih,Z_hh = np.dot(x,w_ih) + b_ih, np.dot(h,w_hh) + b_hh\n",
    "    hidden_size = w_ih.shape[1]//3\n",
    "    r = sigmoid(Z_ih[:,:hidden_size]+Z_hh[:,:hidden_size])\n",
    "    u = sigmoid(Z_ih[:,hidden_size:2*hidden_size]+Z_hh[:,hidden_size:2*hidden_size]) \n",
    "    n = np.tanh(Z_ih[:,2*hidden_size:]+r*Z_hh[:,2*hidden_size:]) \n",
    "    h_next= u*h+(1-u)*n \n",
    "    run = np.column_stack((r,u,n))\n",
    "    #return h_next,(r,u,n)  \n",
    "    return h_next,run \n",
    "\n",
    "def gru_cell_back(dh,run,x,h_pre,w_ih, w_hh,b_ih, b_hh):\n",
    "    hidden_size = w_ih.shape[1]//3\n",
    "    #r,u,n = run\n",
    "    r,u,n = run[:,:hidden_size],run[:,hidden_size:2*hidden_size]\\\n",
    "              , run[:,2*hidden_size:]\n",
    "              \n",
    "    #  H =  U H_pre+(1-U)H_tildas\n",
    "    dn = dh*(1-u)\n",
    "    dh_pre = dh*u\n",
    "    du = h_pre*dh -n*dh   \n",
    "  \n",
    "    #n = \\tanh(W_{in} x + b_{in} + r * (W_{hn} h + b_{hn})) \n",
    "    dnz = (1-np.square(n))*dn \n",
    "        \n",
    "    Z_hn = np.dot(h_pre,w_hh[:,2*hidden_size:])+b_hh[:,2*hidden_size:]\n",
    "    dr = dnz*Z_hn\n",
    "    dZ_ih_n = dnz\n",
    "    dZ_hh_n = dnz*r\n",
    "                \n",
    "    duz = u*(1-u)*du\n",
    "    dZ_ih_u = duz\n",
    "    dZ_hh_u = duz\n",
    "     \n",
    "    drz = r*(1-r)*dr\n",
    "    dZ_ih_r = drz\n",
    "    dZ_hh_r = drz    \n",
    "    \n",
    "    dZ_ih = np.column_stack((dZ_ih_r,dZ_ih_u,dZ_ih_n))\n",
    "    dZ_hh = np.column_stack((dZ_hh_r,dZ_hh_u,dZ_hh_n))            \n",
    "    \n",
    "    dW_ih = np.dot(x.T,dZ_ih)\n",
    "    dW_hh = np.dot(h_pre.T,dZ_hh)\n",
    "    db_ih = np.sum(dZ_ih, axis=0, keepdims=True) \n",
    "    db_hh = np.sum(dZ_hh, axis=0, keepdims=True)             \n",
    "  \n",
    "    dh_pre+=np.dot(dZ_hh,w_hh.T)\n",
    "    dx =  np.dot(dZ_ih,w_ih.T)\n",
    "    return dx,dh_pre,(dW_ih,dW_hh,db_ih,db_hh)\n",
    "\n",
    "class GRUCell(RNNCellBase):\n",
    "    \"\"\"  \\begin{array}{ll}\n",
    "        r = \\sigma(W_{ir} x + b_{ir} + W_{hr} h + b_{hr}) \\\\\n",
    "        z = \\sigma(W_{iz} x + b_{iz} + W_{hz} h + b_{hz}) \\\\\n",
    "        n = \\tanh(W_{in} x + b_{in} + r * (W_{hn} h + b_{hn})) \\\\\n",
    "        h' = (1 - z) * n + z * h\n",
    "        \\end{array}\n",
    "        \n",
    "        Inputs: input, hidden\n",
    "        - **input** of shape `(batch, input_size)`: tensor containing input features\n",
    "        - **hidden** of shape `(batch, hidden_size)`: tensor containing the initial hidden\n",
    "          state for each element in the batch.\n",
    "          Defaults to zero if not provided.\n",
    "          \n",
    "        Outputs: h'\n",
    "        - **h'** of shape `(batch, hidden_size)`: tensor containing the next hidden state\n",
    "          for each element in the batch\n",
    "          \n",
    "        Attributes:\n",
    "            weight_ih: the learnable input-hidden weights, of shape\n",
    "                `(3*hidden_size, input_size)`\n",
    "            weight_hh: the learnable hidden-hidden weights, of shape\n",
    "                `(3*hidden_size, hidden_size)`\n",
    "            bias_ih: the learnable input-hidden bias, of shape `(3*hidden_size)`\n",
    "            bias_hh: the learnable hidden-hidden bias, of shape `(3*hidden_size)`\n",
    "        \n",
    "        \"\"\"\n",
    "\n",
    "    def __init__(self, input_size, hidden_size, bias=True):\n",
    "        super(GRUCell, self).__init__(input_size, hidden_size,bias, num_chunks=3)\n",
    "        \n",
    "    def forward(self, input, h=None): \n",
    "        self.check_forward_input(input)\n",
    "        if h is None:\n",
    "            h= np.zeros(input.shape[0], self.hidden_size, dtype=input.dtype) \n",
    "        self.check_forward_hidden(input, h, '')\n",
    "        return gru_cell(\n",
    "                input, h,\n",
    "                self.W_ih, self.W_hh,\n",
    "                self.b_ih, self.b_hh,\n",
    "            )  \n",
    "    def __call__(self, input, h=None): \n",
    "        return self.forward(input,h)\n",
    "    \n",
    "    def backward(self, dh,run,input,h_pre):       \n",
    "        if  h_pre is None:\n",
    "            h_pre = np.zeros(input.shape[0], self.hidden_size, dtype=input.dtype)      \n",
    "        dx,dh_pre,grads = gru_cell_back(\n",
    "                            dh,run,\n",
    "                            input, h_pre,\n",
    "                            self.W_ih, self.W_hh,\n",
    "                            self.b_ih, self.b_hh )\n",
    "        #grads = (dW_ih,dW_hh,db_ih,db_hh)\n",
    "        for a, b in zip(self.grads,grads):\n",
    "            a+=b \n",
    "        return dx,dh_pre,grads"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##  7.10 多层、双向循环神经网络\n",
    "\n",
    "### 7.10.1 多层循环神经网络\n",
    "\n",
    "可以用前面的循环神经网络单元构建多层循环神经网络，下面的代码在神经网络单元的基础上构建了一个表示多层的循环神经网络的基类RNNBase："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [],
   "source": [
    "from Layers import * \n",
    "class RNNBase(Layer):\n",
    "    def __init__(self,mode,input_size, hidden_size, n_layers,bias = True):\n",
    "        super(RNNBase, self).__init__()        \n",
    "        self.mode = mode\n",
    "        if mode == 'RNN_TANH':\n",
    "            self.cells = [RNNCell(input_size, hidden_size,bias,nonlinearity=\"tanh\")]\n",
    "            self.cells += [RNNCell(hidden_size, hidden_size,bias,nonlinearity=\"tanh\") for i in range(n_layers-1)]\n",
    "        elif mode == 'RNN_RELU':\n",
    "            self.cells = [RNNCell(input_size, hidden_size,bias,nonlinearity=\"relu\")]\n",
    "            self.cells += [RNNCell(hidden_size, hidden_size,bias,nonlinearity=\"relu\") for i in range(n_layers-1)]\n",
    "        elif mode == 'LSTM':\n",
    "            self.cells = [LSTMCell(input_size, hidden_size,bias)]\n",
    "            self.cells += [LSTMCell(hidden_size, hidden_size,bias) for i in range(n_layers-1)]\n",
    "        elif mode == 'GRU':\n",
    "            self.cells = [GRUCell(input_size, hidden_size,bias)]\n",
    "            self.cells += [GRUCell(hidden_size, hidden_size,bias) for i in range(n_layers-1)]\n",
    "        \n",
    "        self.input_size, self.hidden_size = input_size,hidden_size\n",
    "        self.n_layers = n_layers\n",
    "        self.flatten_parameters()\n",
    "        self._params = None\n",
    "     \n",
    "    def flatten_parameters(self):\n",
    "        self.params = []\n",
    "        self.grads = []\n",
    "        for i in range(self.n_layers):\n",
    "            rnn = self.cells[i]\n",
    "            for j,p in enumerate(rnn.params):\n",
    "                self.params.append(p)\n",
    "                self.grads.append(rnn.grads[j])\n",
    "                \n",
    "    def forward(self, x,h=None):\n",
    "        seq_len,batch_size = x.shape[0], x.shape[1]\n",
    "        n_layers = self.n_layers\n",
    "        mode = self.mode\n",
    "        \n",
    "        hs = [[] for i in range(n_layers)]\n",
    "        zs = [[] for i in range(n_layers)]        \n",
    "        if h is None:\n",
    "            h = self.init_hidden(batch_size)\n",
    "        if False:\n",
    "            if mode == 'LSTM':#isinstance(h, tuple):\n",
    "                self.h = (h[0].copy(),h[1].copy())       \n",
    "            else:\n",
    "                self.h = h.copy()     \n",
    "        else:\n",
    "            self.h = h\n",
    "       \n",
    "        for i in range(n_layers):\n",
    "            cell = self.cells[i]\n",
    "            if i!=0:\n",
    "                x = hs[i-1]  # out h of pre layer\n",
    "                if mode == 'LSTM':\n",
    "                    x = np.array([h for h,c in x])\n",
    "                    \n",
    "            hi = h[i]\n",
    "            if mode == 'LSTM':\n",
    "                hi = (h[0][i],h[1][i])\n",
    "            for t in range(seq_len):\n",
    "                hi =  cell(x[t],hi) \n",
    "                if isinstance(hi, tuple):\n",
    "                    hi,z = hi[0],hi[1]\n",
    "                    zs[i].append(z) \n",
    "             \n",
    "                hs[i].append(hi)                \n",
    "              #  if mode == 'LSTM' or mode == 'GRU':\n",
    "              #      zs[i].append(z)                 \n",
    "                \n",
    "        self.hs = np.array(hs)  #(layer_size,seq_size,batch_size,hidden_size)\n",
    "        if len(zs[0])>0:\n",
    "            self.zs = np.array(zs)\n",
    "        else:self.zs = None\n",
    "        \n",
    "        output = hs[-1] # containing the output features (`h_t`) \n",
    "                        # from the last layer of the RNN,\n",
    "        if mode == 'LSTM':\n",
    "            output = [h for h,c in output]\n",
    "        hn = self.hs[:,-1,:,:]  # containing the hidden state for `t = seq_len`\n",
    "        return np.array(output),hn\n",
    "    \n",
    "    def __call__(self, x,h=None):\n",
    "        return self.forward(x,h)\n",
    "    \n",
    "    def init_hidden(self, batch_size):\n",
    "        zeros = np.zeros((self.n_layers, batch_size, self.hidden_size))\n",
    "        if self.mode=='LSTM':\n",
    "            self.h = (zeros,zeros)\n",
    "        else:\n",
    "            self.h = zeros\n",
    "        return self.h\n",
    "    \n",
    "    def backward(self,dhs,input):#,hs):      \n",
    "        if self.hs is None:\n",
    "            self.hs,_ = self.forward(input)\n",
    "        hs = self.hs\n",
    "        zs = self.zs if self.zs is not None else hs        \n",
    "        seq_len,batch_size = input.shape[0], input.shape[1]       \n",
    "        dinput = [None for i in range(seq_len)]\n",
    "     \n",
    "        if len(dhs.shape)==2:  # dh at last time(batch,hidden)\n",
    "            dhs_ = [np.zeros_like(dhs) for i in range(seq_len)]\n",
    "            dhs_[-1] = dhs\n",
    "            dhs = np.array(dhs_)\n",
    "        elif dhs.shape[0]!=seq_len:            \n",
    "            raise RuntimeError(\n",
    "                \"dhs has inconsistent seq_len: got {}, expected {}\".format(\n",
    "                    dhs.shape[0], seq_len))\n",
    "        else:           \n",
    "            pass     \n",
    "\n",
    "         #----dhidden--------    \n",
    "        dhidden = [None for i in range(self.n_layers)]        \n",
    "        for layer in reversed(range(self.n_layers)):\n",
    "            layer_hs = hs[layer]\n",
    "            layer_zs = zs[layer]\n",
    "            cell = self.cells[layer]\n",
    "            if layer==0:\n",
    "                layer_input = input\n",
    "            else:\n",
    "                if self.mode =='LSTM':\n",
    "                    layer_input  = self.hs[layer-1]\n",
    "                    layer_input = [h for h,c in layer_input]\n",
    "                else:\n",
    "                    layer_input = self.hs[layer-1]\n",
    "\n",
    "            h_0 = self.h[layer]                 \n",
    "            dh = np.zeros_like(dhs[0]) #来自后一时刻的梯度                \n",
    "            if self.mode =='LSTM':\n",
    "                h_0 = (self.h[0][layer],self.h[1][layer])\n",
    "                dc = np.zeros_like(dhs[0])\n",
    "            for t in reversed(range(seq_len)):\n",
    "                dh += dhs[t]          #后一时刻的梯度+当前时刻的梯度\n",
    "                h_pre = h_0 if t==0 else layer_hs[t-1]\n",
    "                if self.mode=='LSTM':\n",
    "                    dhc = (dh,dc)\n",
    "                    dx,dhc,_ = cell.backward(dhc,layer_zs[t],layer_input[t],h_pre)  \n",
    "                    dh,dc = dhc\n",
    "                else:\n",
    "                    dx,dh,_ = cell.backward(dh,layer_zs[t],layer_input[t],h_pre)  \n",
    "                if layer>0:\n",
    "                    dhs[t] = dx\n",
    "                else :\n",
    "                    dinput[t] = dx\n",
    "                #----dhidden--------    \n",
    "                if t==0:\n",
    "                    if self.mode=='LSTM':\n",
    "                        dhidden[layer] = dhc\n",
    "                    else:\n",
    "                        dhidden[layer] = dh                    \n",
    "        return np.array(dinput),np.array(dhidden)\n",
    "\n",
    "    def parameters(self):\n",
    "        if self._params is None:\n",
    "            self._params = []           \n",
    "            for  i, _ in enumerate(self.params):  \n",
    "                self._params.append([self.params[i],self.grads[i]])  \n",
    "        return self._params"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "在这个基类的基础上，可以实现类型的多层循环神经网络，例如下面的表示RNN、LSTM、GRU分别实现了多层的简单循环网络、LSTM和GRU循环神经网络："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [],
   "source": [
    "class RNN(RNNBase):\n",
    "    def __init__(self,*args, **kwargs):\n",
    "        if 'nonlinearity' in kwargs:\n",
    "            if kwargs['nonlinearity'] == 'tanh':\n",
    "                mode = 'RNN_TANH'\n",
    "            elif kwargs['nonlinearity'] == 'relu':\n",
    "                mode = 'RNN_RELU'\n",
    "            else:\n",
    "                raise ValueError(\"Unknown nonlinearity '{}'\".format(\n",
    "                    kwargs['nonlinearity']))\n",
    "            del kwargs['nonlinearity']\n",
    "        else:\n",
    "            mode = 'RNN_TANH'\n",
    "        super(RNN, self).__init__(mode, *args, **kwargs)\n",
    "\n",
    "class LSTM(RNNBase):\n",
    "    def __init__(self,*args, **kwargs):        \n",
    "        super(LSTM, self).__init__('LSTM', *args, **kwargs)\n",
    "        \n",
    "class GRU(RNNBase):\n",
    "    def __init__(self,*args, **kwargs):        \n",
    "        super(GRU, self).__init__('GRU', *args, **kwargs)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "可以用下面的代码测试这些多层循环神经网络："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "input.shape (6, 3, 5)\n",
      "h_0.shape (2, 3, 8)\n",
      "c_0.shape (2, 3, 8)\n",
      "output.shape (6, 3, 8)\n",
      "output [[[-1.41332952e-01  1.01403706e-01 -2.85916340e-02 -9.87413739e-02\n",
      "    9.33856770e-03  6.02946596e-02  1.81926512e-01 -3.19521866e-01]\n",
      "  [ 1.16079351e-01  3.32220063e-01  6.13840909e-02  4.80895026e-02\n",
      "    2.66551843e-01  2.13127872e-02 -1.83932741e-02  2.46244947e-01]\n",
      "  [-6.38352520e-02  1.31319471e-01 -1.25023626e-01 -1.65318605e-01\n",
      "   -2.73226230e-01 -3.07002126e-01  3.00010024e-03  3.13673563e-01]]\n",
      "\n",
      " [[-1.73038422e-01  1.31192289e-02  3.03686087e-02 -8.07275142e-02\n",
      "    1.63274056e-01  8.71913480e-02  1.02719109e-02 -2.85726641e-01]\n",
      "  [-8.93547979e-02  2.28040942e-01  1.36844426e-01  8.24556301e-02\n",
      "    3.26063109e-01  6.35332440e-02  3.03777141e-04 -5.60717166e-03]\n",
      "  [-1.79679229e-01  1.87130354e-01 -1.31782501e-01  1.08536411e-02\n",
      "   -1.01416870e-02 -1.13561668e-01  9.82197446e-03  2.02974940e-01]]\n",
      "\n",
      " [[-1.98514725e-01  6.29468711e-03  7.55194285e-02 -1.20916418e-02\n",
      "    2.63115727e-01  9.14563312e-02 -2.49205445e-02 -2.25903885e-01]\n",
      "  [-1.74012049e-01  8.58055294e-02  1.61814488e-01  7.63295391e-02\n",
      "    3.30887749e-01  7.54041027e-02 -5.19134411e-03 -1.20295169e-01]\n",
      "  [-2.14429521e-01  1.15476652e-01 -7.46819687e-02  7.90751963e-02\n",
      "    1.13905002e-01 -3.07949194e-02 -5.94549980e-03  3.41307453e-02]]\n",
      "\n",
      " [[-2.18822742e-01 -1.17900297e-02  5.64256932e-02  6.72026637e-02\n",
      "    2.69448784e-01  7.94663225e-02 -4.49694090e-02 -2.13955227e-01]\n",
      "  [-2.11523721e-01  3.34232404e-02  1.68715402e-01  9.58796009e-02\n",
      "    3.34953355e-01  7.03128389e-02 -7.44313038e-05 -1.70653685e-01]\n",
      "  [-2.36888973e-01  5.89951549e-02 -5.43605914e-02  1.43248809e-01\n",
      "    1.76261559e-01  5.09680500e-03 -1.16623079e-02 -6.15580537e-02]]\n",
      "\n",
      " [[-2.31250034e-01 -1.41271533e-02  6.03347644e-02  1.13391079e-01\n",
      "    2.94025901e-01  6.82063746e-02 -3.73624994e-02 -2.05344178e-01]\n",
      "  [-2.21608983e-01 -5.17014142e-03  1.50915088e-01  1.06572322e-01\n",
      "    3.23104663e-01  6.05834754e-02 -1.52396152e-02 -1.93866644e-01]\n",
      "  [-2.44174725e-01  3.16578889e-02 -3.74989622e-02  1.75250718e-01\n",
      "    2.11648022e-01  1.07629315e-02 -1.88288895e-02 -1.12641486e-01]]\n",
      "\n",
      " [[-2.54583297e-01 -4.74430159e-03  6.82104685e-02  1.22602260e-01\n",
      "    3.12361336e-01  5.79044388e-02 -3.30529915e-02 -1.82420982e-01]\n",
      "  [-2.30894274e-01 -2.04957184e-02  1.45215764e-01  9.06556985e-02\n",
      "    3.31000539e-01  5.90061328e-02 -2.51147617e-02 -1.89354873e-01]\n",
      "  [-2.81921217e-01  3.08401690e-02 -2.68353637e-02  1.42356918e-01\n",
      "    2.54187772e-01  2.25654343e-02 -7.32796192e-03 -1.09110074e-01]]]\n",
      "hn [[[[ 0.09645903  0.04969926  0.37169596 -0.14119736  0.3287604\n",
      "    -0.1250236   0.22541772 -0.07196655]\n",
      "   [ 0.16603822  0.14429172  0.20019071 -0.12523335  0.29628868\n",
      "     0.21202227  0.13106821 -0.02808015]\n",
      "   [-0.12050428 -0.13985964  0.58623291 -0.17074305 -0.23311151\n",
      "    -0.28519532  0.22746292 -0.18068336]]\n",
      "\n",
      "  [[ 0.24488731  0.11305883  0.62084169 -0.37942143  0.44604925\n",
      "    -0.23343854  0.53975388 -0.12627864]\n",
      "   [ 0.37166115  0.27236822  0.37262794 -0.50381242  0.44055602\n",
      "     0.34815519  0.39526646 -0.05167814]\n",
      "   [-0.19497399 -0.36481165  0.80692177 -0.2547238  -0.50870011\n",
      "    -0.77412778  0.39446254 -0.52366144]]]\n",
      "\n",
      "\n",
      " [[[-0.2545833  -0.0047443   0.06821047  0.12260226  0.31236134\n",
      "     0.05790444 -0.03305299 -0.18242098]\n",
      "   [-0.23089427 -0.02049572  0.14521576  0.0906557   0.33100054\n",
      "     0.05900613 -0.02511476 -0.18935487]\n",
      "   [-0.28192122  0.03084017 -0.02683536  0.14235692  0.25418777\n",
      "     0.02256543 -0.00732796 -0.10911007]]\n",
      "\n",
      "  [[-0.47205098 -0.01212039  0.1794584   0.19875715  0.63503604\n",
      "     0.13478924 -0.05923049 -0.32697245]\n",
      "   [-0.43786722 -0.04943713  0.39691116  0.15679702  0.65199092\n",
      "     0.1456739  -0.0433396  -0.33724216]\n",
      "   [-0.57893445  0.08227842 -0.07455036  0.20376939  0.53587268\n",
      "     0.04590336 -0.01334409 -0.18276796]]]]\n",
      "dinput.shape: (6, 3, 5)\n",
      "dinput: [[[-3.05724316e-02  3.95340786e-03  7.66018796e-04 -3.90778155e-02\n",
      "    4.77033039e-02]\n",
      "  [ 2.02112289e-02 -1.93243434e-02  1.80640962e-02 -1.79143059e-02\n",
      "   -3.78011102e-02]\n",
      "  [-1.46870749e-02 -6.61252840e-03  1.72205826e-03 -1.23474989e-02\n",
      "   -3.47530763e-02]]\n",
      "\n",
      " [[-3.77989901e-02  1.57486148e-02 -2.01571917e-02 -3.09397909e-02\n",
      "    4.64715150e-02]\n",
      "  [ 1.59308978e-02 -1.80281694e-02  1.55637298e-02 -1.40080877e-02\n",
      "    5.01858980e-04]\n",
      "  [ 1.23932625e-02  2.56878981e-02 -2.36552869e-02 -1.94397356e-04\n",
      "   -1.19059471e-02]]\n",
      "\n",
      " [[-2.31335353e-02  2.22115062e-02  4.65019110e-04 -2.90895805e-02\n",
      "    6.41608183e-02]\n",
      "  [ 1.39058135e-02 -2.47179503e-02  2.42928234e-02 -3.09275445e-02\n",
      "    2.73211669e-02]\n",
      "  [-1.02014458e-02  2.72877561e-02 -1.20315702e-02  4.68807180e-03\n",
      "    3.53646331e-03]]\n",
      "\n",
      " [[-3.40038387e-02  5.41998467e-02 -1.65087883e-02 -2.69517195e-02\n",
      "    3.40499874e-02]\n",
      "  [-2.45905730e-02 -4.37383984e-03 -3.72624879e-03 -3.58383725e-02\n",
      "    2.41563060e-02]\n",
      "  [-9.20556186e-03  3.16019929e-02 -4.25767873e-02 -5.63197303e-05\n",
      "   -2.25173901e-02]]\n",
      "\n",
      " [[-2.69704222e-02  1.35870725e-02 -2.23332467e-02 -5.16818238e-02\n",
      "    2.43392369e-02]\n",
      "  [ 7.56591682e-04 -2.50561314e-02  2.56696546e-02 -1.55584617e-02\n",
      "   -5.11610331e-03]\n",
      "  [-1.79085339e-02 -1.89453382e-02  1.12750879e-02  9.39168521e-03\n",
      "   -4.98088836e-02]]\n",
      "\n",
      " [[-1.65909057e-02  1.32470445e-02 -2.05614034e-02 -1.72013748e-02\n",
      "   -1.38522114e-03]\n",
      "  [-4.45504453e-03 -5.29148696e-03  5.59126899e-03 -3.64788982e-03\n",
      "    7.14069734e-03]\n",
      "  [-3.91099199e-02  4.02649831e-03 -1.92630209e-02  1.66485122e-02\n",
      "   -2.26481421e-02]]]\n",
      "dhidden: [[[[ 4.16122396e-02 -1.07007176e-02 -9.69533240e-03 -2.47613485e-02\n",
      "     5.40732942e-04 -1.04418357e-02 -3.27476600e-03  4.91882991e-04]\n",
      "   [-7.82127765e-03  1.53033691e-02  8.48431933e-03  1.49939928e-02\n",
      "    -1.89639278e-02  2.19290779e-03 -1.65035479e-03 -9.58252008e-03]\n",
      "   [ 1.79076946e-02  3.47038640e-02 -2.95481308e-03  2.34217102e-04\n",
      "    -2.42610915e-03 -3.91801167e-02 -1.86080318e-03  1.53085468e-02]]\n",
      "\n",
      "  [[-2.23206480e-02 -2.64329909e-02  1.15344046e-03 -2.66880551e-02\n",
      "     5.26493431e-04 -7.52089094e-02 -3.88463290e-03  4.04954909e-03]\n",
      "   [ 2.63176481e-02  3.09893495e-02  2.68454165e-02  6.96664634e-03\n",
      "     2.05050313e-02  2.21679327e-02  1.92271304e-03  2.77727669e-02]\n",
      "   [ 3.46178722e-02  7.66792936e-03  4.84681212e-02  5.26726043e-03\n",
      "    -7.52977082e-02 -1.92551343e-02 -2.89106877e-02 -6.32467473e-02]]]\n",
      "\n",
      "\n",
      " [[[ 1.22778396e-01 -1.43327236e-01 -2.41624653e-02  3.23142204e-02\n",
      "    -1.10995269e-01  7.95120090e-02 -6.40622077e-02  1.22640495e-01]\n",
      "   [-6.30538295e-02  1.04246004e-01 -8.11837983e-02  2.30229004e-02\n",
      "    -4.81679325e-02  7.10759185e-02 -1.62665013e-01 -1.40321129e-01]\n",
      "   [-3.19695693e-01  9.21002653e-02 -2.48428043e-01 -1.48841358e-01\n",
      "    -3.87719412e-02 -1.43423807e-01  2.55663044e-02  4.08578161e-02]]\n",
      "\n",
      "  [[ 1.75977727e-03  5.08437772e-01 -4.83143354e-01 -2.41983135e-02\n",
      "     9.16293573e-03  2.37729503e-02  2.50141613e-01  1.96956501e-01]\n",
      "   [ 1.71731932e-01  7.42050682e-02  6.74040121e-01 -1.12127051e-01\n",
      "    -3.95439169e-02  8.26141106e-03 -9.41731325e-01  3.38355311e-02]\n",
      "   [ 1.68743583e-01 -1.26725720e-01 -5.73708141e-01 -7.65622735e-01\n",
      "    -2.20044706e-01  2.03210773e-01  5.63961467e-01  4.28330547e-01]]]]\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "#from rnn import *\n",
    "np.random.seed(1)\n",
    "\n",
    "num_layers= 2\n",
    "batch_size,input_size,hidden_size= 3,5,8\n",
    "seg_len = 6\n",
    " \n",
    "test_RNN = \"LSTM\"\n",
    "\n",
    "if test_RNN == \"rnnTANH\":\n",
    "    rnn = RNN(input_size,hidden_size,num_layers )\n",
    "elif test_RNN == \"rnnRELU\":\n",
    "    rnn = RNN(input_size,hidden_size, num_layers,nonlinearity= 'relu')\n",
    "elif test_RNN == \"GRU\":\n",
    "    rnn = GRU(input_size,hidden_size, num_layers)\n",
    "elif test_RNN == \"LSTM\":\n",
    "    rnn = LSTM(input_size,hidden_size, num_layers)\n",
    "    c_0 = np.random.randn(num_layers, batch_size, hidden_size)\n",
    "    \n",
    "input = np.random.randn(seg_len, batch_size, input_size)\n",
    "h_0 = np.random.randn(num_layers, batch_size, hidden_size)\n",
    "\n",
    "print(\"input.shape\",input.shape)\n",
    "print(\"h_0.shape\",h_0.shape)\n",
    "print(\"c_0.shape\",c_0.shape)\n",
    "\n",
    "if test_RNN == \"LSTM\":   \n",
    "    output, hn = rnn(input, (h_0,c_0))\n",
    "else:\n",
    "    output, hn = rnn(input, h_0)\n",
    "\n",
    "print(\"output.shape\",output.shape)\n",
    "print(\"output\",output)\n",
    "print(\"hn\",hn)\n",
    "\n",
    "#------test backward---    \n",
    "do = np.random.randn(*output.shape)\n",
    "dinput,dhidden = rnn.backward(do,input)#,rnn.hs)#output)\n",
    "print(\"dinput.shape:\",dinput.shape)\n",
    "print(\"dinput:\",dinput)\n",
    "print(\"dhidden:\",dhidden)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "上面的多层LSTM循环神经网络的每个隐含层的隐状态大小都是一样的，为了使这个多层神经网络适应不同输出值大小的问题，可以在多层LSTM循环神经单元的基础上，再增加一个全连接的输出层，以输出针对不同输出向量大小的特定问题。下面的LSTM_RNN就是一个这样的多层循环神经网络，其中input_size, hidden_size, output_size分别是输入数据大小、隐状态大小和输出值大小，num_layers则是循环神经网络的层数。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [],
   "source": [
    "from Layers import *\n",
    "class  LSTM_RNN(object):\n",
    "    def __init__(self, input_size, hidden_size, output_size,num_layers):\n",
    "        super(LSTM_RNN, self).__init__()\n",
    "        self.input_size = input_size\n",
    "        self.hidden_size = hidden_size       \n",
    "        self.num_layers = num_layers\n",
    "\n",
    "        # Define the LSTM layer\n",
    "        self.lstm = LSTM(input_size,hidden_size,num_layers)\n",
    "\n",
    "        # Define the output layer\n",
    "        self.linear = Dense(hidden_size, output_size)\n",
    "        self.layers = [self.lstm,self.linear]\n",
    "        self._params = None\n",
    "\n",
    "    def init_hidden(self,batch_size):\n",
    "        # This is what we'll initialise our hidden state as\n",
    "        self.h_0 =  (np.zeros((self.num_layers, batch_size, self.hidden_size)),\n",
    "                np.zeros((self.num_layers, batch_size, self.hidden_size)))\n",
    "\n",
    "    def forward(self, input):\n",
    "        # input:(seq_len, batch, input_size)\n",
    "        # shape of hs_out: [input_size, batch_size, hidden_dim]\n",
    "        # shape of self.h_0: (a, b), where a and b both \n",
    "        # have shape (num_layers, batch_size, hidden_dim).\n",
    "      \n",
    "        hs_out, self.h_0 = self.lstm(input,self.h_0)\n",
    "      \n",
    "        batch_size = input.shape[1]\n",
    "        y_pred = self.linear(hs_out[-1].reshape(batch_size, -1))\n",
    "        return y_pred#.reshape(batch_size, -1)#.flatten() #view(-1)\n",
    "    \n",
    "    def __call__(self, input):\n",
    "        return self.forward(input)\n",
    "    \n",
    "    def backward(self,dZs,input):\n",
    "        dhs = self.linear.backward(dZs)\n",
    "        dinput = self.lstm.backward(dhs,input)          \n",
    "\n",
    "    def parameters(self):\n",
    "        if self._params is None:\n",
    "            self._params = []\n",
    "            for layer in self.layers:\n",
    "                for  i, _ in enumerate(layer.params):  \n",
    "                    self._params.append([layer.params[i],layer.grads[i]])  \n",
    "        return self._params"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(20, 80, 1)\n",
      "(20, 80, 1)\n",
      "(1, 80, 20)\n",
      "Epoch  0 MSE:  0.03588440890477615\n",
      "Epoch  100 MSE:  0.010732580011911729\n",
      "Epoch  200 MSE:  0.010387283087887647\n",
      "Epoch  300 MSE:  0.010223361294028982\n",
      "Epoch  400 MSE:  0.010150461486744412\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAD4CAYAAADvsV2wAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nOy9eXxjd33v/f5psRYvktfxPqtnMvuayZ4QspAEaOACLSm30AuU5nLT0lvgNrRPS+lyobc8DVygpIHSBp6maQhbEkKTkH2DZJLZV3tmPDPyvsqSLFnb7/njd44k25ItjyVLGp/36zUvSUfH5xxpjj7ne76rkFJiYGBgYHDpYyr0ARgYGBgYLA2G4BsYGBgsEwzBNzAwMFgmGIJvYGBgsEwwBN/AwMBgmWAp9AHMRV1dnVy1alWhD8PAwMCgZHjrrbeGpZT16d4rasFftWoV+/btK/RhGBgYGJQMQohzmd4zXDoGBgYGywRD8A0MDAyWCYbgGxgYGCwTitqHb2BgYDAfkUgEj8dDKBQq9KEsKXa7ndbWVqxWa9Z/Ywi+gYFBSePxeKisrGTVqlUIIQp9OEuClJKRkRE8Hg+rV6/O+u8Ml46BgUFJEwqFqK2tXTZiDyCEoLa2dsF3NYbgGxgYlDzLSex1LuYzG4K/ULpfhcHjhT4KAwMDgwWTE8EXQtwmhDgphOgSQtw7x3qXCyFiQogP5mK/BeGJP4Ln/qbQR2FgYFBEmM1mduzYwZYtW/jQhz7E5OTkRW/rd3/3d3n00UdzeHRJFi34Qggz8C3gdmATcJcQYlOG9f4OeGqx+ywo4QD4Bwp9FAYGBkWEw+HgwIEDHDlyhLKyMu6///5p78disQId2XRyYeHvBbqklGeklGHgYeDONOv9AfAjYDAH+ywckSD4S/sjGBgY5I/rrruOrq4uXnjhBW688UZ++7d/m61btxKLxfj85z/P5ZdfzrZt2/inf/onQGXc3HPPPWzatIl3v/vdDA4m9eXee+9l06ZNbNu2jc997nOLPrZcpGW2ABdSXnuAK1JXEEK0AO8H3glcPtfGhBCfAj4F0N7enoPDyzHRkPpnYGBQdHzp8aMc653I6TY3NVfxxfduzmrdaDTKL37xC2677TYA3njjDY4cOcLq1at54IEHcLlcvPnmm0xNTXHNNddw6623sn//fk6ePMnhw4cZGBhg06ZNfPzjH2d0dJSf/OQnnDhxAiEE4+Pji/4subDw04WKZw7K/RrwJ1LKee9rpJQPSCn3SCn31NenbfhWOKRUFn5kEqb8hT4aAwODIiEYDLJjxw727NlDe3s7n/jEJwDYu3dvIk/+6aef5vvf/z47duzgiiuuYGRkhM7OTl566SXuuusuzGYzzc3NvPOd7wSgqqoKu93OJz/5SX784x/jdDoXfZy5sPA9QFvK61agd8Y6e4CHtTSiOuAOIURUSvnTHOx/6YiFSVzLAoNgqyjo4RgYGEwnW0s81+g+/JmUl5cnnksp+cY3vsG73vWuaes8+eSTaVMsLRYLb7zxBs8++ywPP/ww3/zmN3nuuecWdZy5sPDfBDqEEKuFEGXAh4HHUleQUq6WUq6SUq4CHgU+XXJiD9NdOf6hwh2HgYFByfGud72Lb3/720QiEQBOnTpFIBDg+uuv5+GHHyYWi9HX18fzzz8PgN/vx+v1cscdd/C1r30t7QVloSzawpdSRoUQ96Cyb8zA96SUR4UQd2vv3z/nBkqJSIrgB4zArYGBQfZ88pOfpLu7m127diGlpL6+np/+9Ke8//3v57nnnmPr1q2sX7+eG264AQCfz8edd95JKBRCSsl999236GMQUs50txcPe/bskUU1AGWsG76+XT1/9z/A5Z8o6OEYGBjA8ePH2bhxY6EPoyCk++xCiLeklHvSrW9U2i6EaRa+4dIxMDAoLQzBXwjRYPK5kYtvYGBQYhiCvxCiU8nnhg/fwMCgxDAEfyFENAvfXGZk6RgYGJQchuAvBD0t09VqWPgGBgYlhyH4C0G38N3thoVvYGBQchiCvxB0C9+9EsK+5AXAwMBgWaO3R968eTPbt2/nH/7hH4jH43P+TXd3Nw899NASHaHCEPyFkGrhg5GpY2BgACRbKxw9epRnnnmGJ598ki996Utz/o0h+MWOnqXjXqkejVx8AwODGTQ0NPDAAw/wzW9+Eykl3d3dXHfddezatYtdu3bx2muvAar18csvv8yOHTu47777Mq6XS3LRPG35EJ1p4RuDUAwMiopf3Av9h3O7zcatcPtXFvQna9asIR6PMzg4SENDA8888wx2u53Ozk7uuusu9u3bx1e+8hW++tWv8sQTTwAwOTmZdr1cYgj+QoiEAAGuFvXacOkYGBhkQG9bE4lEuOeeezhw4ABms5lTp06lXT/b9RaDIfgLIRoEix3KtT79hkvHwKC4WKAlni/OnDmD2WymoaGBL33pS6xYsYKDBw8Sj8ex2+1p/+a+++7Lar3FYPjwF0IkBFY7WGxgd81v4UsJw11Lc2wGBgZFwdDQEHfffTf33HMPQgi8Xi9NTU2YTCZ+8IMfJObbVlZW4vP5En+Xab1cYgj+QogGweJQz8sb5i++6n4Zvrkbet7O/7EZGBgUDH3i1ebNm7n55pu59dZb+eIXvwjApz/9aR588EGuvPJKTp06lRiKsm3bNiwWC9u3b+e+++7LuF4uMVw6CyE6pSx8gIqG+Yuv+g6pxwu/hpZd+T02AwODgjGXNd7R0cGhQ4cSr7/85S8DYLVaefbZZ6etm269XGJY+Ashkmrh189v4Y90qsfexU+qMTAwMFgshuAvhGhI+e8hOwt/5LR67DME38DAoPDkRPCFELcJIU4KIbqEEPemef9OIcQhIcQBIcQ+IcS1udjvkhMJgVWz8CsaYMo7fSjKTIY1C3/4FIQD+T8+A4NlSjFP7ssXF/OZFy34Qggz8C3gdmATcJcQYtOM1Z4FtkspdwAfB7672P0WBD0tE1TQFjKnZoYmwN8PbVeAjOe+GMTAwAAAu93OyMjIshJ9KSUjIyMLTt3MRdB2L9AlpTwDIIR4GLgTOJZycP6U9cuB0vyfiU5Nt/BB+fHdbbPXHdXcOVs/pIK2vQeg/cqlOU4Dg2VEa2srHo+HoaHlVRdjt9tpbW1d0N/kQvBbgAsprz3AFTNXEkK8H/gy0AC8Owf7XXoiaSz8TH58Pf9+1bVQscLw4xsY5Amr1crq1asLfRglQS58+CLNslkWvJTyJ1LKy4D3AX+dcWNCfErz8+8ruit2NJSSlqlX22bI1BnpBATUrIGmHdC7f0kO0cDAwCATuRB8D5Dq02gFejOtLKV8CVgrhKjL8P4DUso9Uso99fX1OTi8HJLWws8k+F2qyZrFBs07jMCtgYFBwcmF4L8JdAghVgshyoAPA4+lriCEWCeEENrzXUAZMJKDfS8t0VBS8K12sFVlDtoOd0Jdh3revNMI3BoYGBScRQu+lDIK3AM8BRwHHpFSHhVC3C2EuFtb7QPAESHEAVRGz2/JUgupS6ksfD1oC6r4Kp2FL6XKwa/VBL9ph3o0CrAMDAwKSE5aK0gpnwSenLHs/pTnfwf8XS72VTBiEUAmLXzQiq/SCL6vDyIBqF2rXlc1XVqB2ykfHPoP2P1xMBm1ewYGpYLxa80WffjJTAs/XdBWL7jSXTqgBW4vEcE//jj8/LNGINrAoMQwBD9b9IpavbUCZLbwR7SUzNoUwW/eAcMnL43Arf6Z9V5BBgYGJYEh+NmiW/iWFAu/YgWExiEanr7uSBdYnVDZlFzWtEML3B7J/7Hmm8lh9ThsCL6BQSlhCH626Ba+NcWHn2ny1XCn8t+n+rebtcDtpeDHD2gJVsO5H8FmYGCQPwzBz5a0Fn5Ke4VURrqgdt30ZZVNKnf/UvDj6xb+iDHNy8CglDAEP1uiU+pxmoWfpr1CdArGz0333wMIoaz8S8LC1wX/NMRzP4bN4BLm0U/Azz9X6KNYthiCny2RdBZ+mvYKY93KV183Q/BB+fGHTiS3VapMDoMwQ2wKxs8X+mgMSolzr8Lxx1StisGSYwh+tkTTZOnoFr7Xk1ymBzL1HPxUKhvVxSDkzc8xLhWBEWjarp4bbh2DbIlFwNcP/gHDUCgQhuBnSyRNHn6ZE9qvhte+meyOqacqznTpgMrcAYhM5u848014UhWVtV+lXhuZOgbZMtFLoq/ihTcKeijLFUPws0Wz8J8/7ePN7tHk8g98B8xWeOSjSgxHulS6pr1q9jb0i8VcU7KKHT1gW78B7O60mTpvnRvjsYMZ++cZAD3jQb778pllNbSDiZ7kc8/8gh+KxPj6LzuZDEfzeFDLC0Pws0Wz8P/+uXN89amTyeWuViX6g8dU9elwmgwdnYSFX8I+fD1gW16n4hRpXDrffuE0f/rjw8TimpgFRiA4voQHWfw8us/D3/z8OKeHLoFCvGzRXZ+VzWoo0Dy8fnqE+355iqePDuT5wJYPhuBni5al4/FJDnm8RGPx5HvrboYb/gQOPqQsl4yCr2X4lLJLZ1LLwS+vV26rNC4dz9gk/qkoXYN+FZz7/p3w2B8s8YEWN54xdQ7sPz9W4CNZQrzanKTN71cFiPNUnS/L7yjPGIKfLVoefogygpEYJwd809+/4X/BmhszZ+hA0sKPlrBLR7fwnbXqc/r71fxeDSklPWPqu9p/fgz6D8HAYRg9W4ijLVp6RgNU4Wf/hWV05+PtAUcNrLkBZAx63p5zdY9+Hi2n7yjPGIKfLZEQEkFYazC6//yMk9Bkhg98V1kvHe9Kv42ED7+ULfwZLh2Y1lNnIhjFN6V8rgcujMOhR9QbfuO2PMGUnz8c+DNet/0BZ7u7C300S4fXA64WaL1cvZ7Hj68L/rHeCUIRo94jFxiCny3RIDGTDRCUmU1KzGZSXgcf+leoX59+GwnBL3Efvsmqhr/omUjDST++Z1xdzMrMJg6eG4HDP9T+bghiRvANXz/yX+9gb2w/5WKKlcMvLp+gpNcDrjZw1kDd+nkzdTzjQcrMJqJxydHeEk9lLhIMwc+WSIiIqQyzSXD1utqL8ytaLgELPzCsLmxCQM1qVYCVYuHrVtn16+upG/61suxX3wDI5N3BcmXwBHz3FuRwJ5+IfI4hSyM3m/ZxyLNMxGzCo5IcAFr3KsGfI0upZ2yS69erSaiz7qgNLgpD8LMlGmKKMppcdi5fVcPpoQDeycjCtnGppGU6tXHEFhtUr5yWmqkL/nu3N3Gn+VWi1krY9VH1pq9/qY+2eBg/D9+7FaIhjr3rYZ6P72Ri5a1cZzrC4TPLIIU1NKEKDqta1Ou2vRAcVe050hAMxxj2h9nZXk2L22EIfo4wBD9boiGCsozWagc72twAHPAs8CS8FAqvdAtfp7ZjuktnbJLyMjPvWF3BbaY3OFX7TnCvVG9mGvi+HDj/ayV4dz3MKZOqwnZs/Q1sIkKs85kCH9wSoOfg6xZ+2171mCE9s0dzDbZWO9jZ7k7vQjVYMDkRfCHEbUKIk0KILiHEvWne/4gQ4pD27zUhxPZc7HdJiQQJxC20VjvZ1upCCDiwUKvDbAVhKm0f/uQMwa/rgNFkEzXPWJDWaieu889QIUL8nOuSXUX9y9jC9/Wpx7qOxF1Qzcbr8ZuraB98/tIvwPLqgt+mHus2gM2VMXB7QfuOlOBX0zMeZGCihO+Mi4RFC74QwowaTH47sAm4SwixacZqZ4EbpJTbgL8GHljsfpeaeCRIIGahtdpBpd3K+oZK9l9YoB9fCGXll7LgB0aSLh1Qgh8NJXKsleA74NAjjFvq+Y/BdmRC8LPI1PEPwfduSwrEpYJ/AKzlYKvEMzZJQ6UNu81G/4p3cE18H72jvvm3UcroOfguzaVjMkHb5RkDt56E4DvZ2a7uqA23zuLJhYW/F+iSUp6RUoaBh4E7U1eQUr4mpdTV8VdAaw72u6SEQ5OEKKO1Wrlldra72X9+fOGWmdWR7K1fakSnIOyD8trkshmZOp6xSdZXhOD0s/S0vYfhySgenwS7C3xZCH7/QTj/OvTsy8MHKCC+PqhcAULQM65dFAHL5vfiEpNc2P/LAh9gnvF6VIC/ojG5rHUvDB5P20zQMzZJmdlEfYWNTU1VWM3CcOvkgFwIfgtwIeW1R1uWiU8Av8j0phDiU0KIfUKIfUNDQ5lWW3IioUlCsowWt/qh7mx34w1GODu8wNJ4q6N0LfxE0dUMCx9gpBNvMIIvFOW6qechHsW2+y4A3j4/pn7o2Vj4k2PT93Wp4OtPjLz0jAVp0QyH5l13EJRlmE/9vJBHl38metTnN1uSy9r2AhI8sy/unrEgzW47JpPAbjWzqdllVNzmgFwIvkizLK3ZK4S4ESX4f5JpY1LKB6SUe6SUe+rr63NweLkhFg5qFr4S/B1t1cBF3GZaHKUbtNVHOab68MvrlfU+3IlnNMDvmv+TK7u+Dm1XsGrj5TisZvUdVTRkJ/hB7Uett3C4VPD1QWUjsbikN8XCL3NUcNi+m9XDL1zaPeK9KSmZOi27VUzL8+as1fVYkM7ONvfsliYGCyYXgu8B2lJetwKz8syEENuA7wJ3SilL7tccjwQTaZkA6xoqqLBZFn6bWcoW/mQaC18I5dbpP0T1U/fwl9bv42t/J3zkh1jMJra2ulRpfGWWFn5Q60R6KVn4Uip3VmUTg74QkZhMCD5Af9NN1MWHiXj2F/Ag80w6wbdXqWVpUjN7xoLTvqOd7e70LU0MFkQuBP9NoEMIsVoIUQZ8GHgsdQUhRDvwY+B3pJQlOflaREOYyuxYzOorM5sE29tcCw/clnLQNpDSOC2Vug7wvEnT+Sf4+8hvEvvQD5TVj/qhHu+dIOqsV6I3nxWbsPAvIcGf8qkZAhUrpgUjdeyb301MCkbf/kmhjjC/xOPKpeNK4+l1tc8ahhKKxBj2T00XfO2O2vDjL45FC76UMgrcAzwFHAcekVIeFULcLYS4W1vtL4Ba4B+FEAeEECUXkTPHprDanNOW7Wyr5nifj2B4AX0+LgULPzVoC6ppXGUTD637f/kX8weoLk9OBdvZVk04Fqc/7lbB6ql5LLTgJejD1wvOKpsSHSBTxWxLx2relJfhPPHjS+tz6wSGIBZOpmSm4m5PZvBopLsottU4qC0vMzJ1FklO8vCllE9KKddLKddKKf9WW3a/lPJ+7fknpZTVUsod2r89udjvUmKVU9js5dOW7WhzE4tLDvcsoDS+lAU/MAwmixp8ksr234LPnuDF+HZaqx0IkQzr6Cl1XZPadzefW2dSc+lcSj58PQe/shHPqPq/14P/AE0uO/9m/SC20CB850YYOFaIo8wfE1of/JkuHQB3m/p+ouHEonQXRSGElhlnBG4Xg1FpmwXhSAybDGN3zhD8RH7wAk5CawkHbSeHVVtkkS5OPzvQBrCiyk6zy84hr2b1zyf4l6KFr3/myiZ6xoPUV9qwW82Jt4UQhFe9gz+w/Y1Kff3nW+HU0wU62DygDz6pSufSaVMtxVOmYaWz8EEZWBfV0sQggSH4WdA/5sMkJOXlFdOW11XYaK9xLuw20+oo3X74geHpAdsZeMYmp1muOjvbq3lj0KpezCv4KRb+pZK1krDwlQ8/03f01HgrYx95CmpWwb//Fuz/t6U9znzhncvCb9fWSbp1PGNBrGZBQ6Vt2qo72zU//kJbmhgkMAQ/C/qGldVZUVE5671d7W7ePj+WfQGW1Vm6Fn5geLb/XsMbjDARik67DdfZ2e7m8IS2fL7iq+AYICAeSVuQU5L4+qGsIlFlm+472qWJ2dvjTvj4U9CwGd4ouYL09Hh71HnvqJ79nlvz66cEbj1jkzS7HZhM0+8kt7e5EcKYgLUYDMHPgoERZVG4KmcL/s72agZ9U/R5s7TaLfbS9eFPZrbwezLchoP6jryUEzdZ57bw43E1+1YXgUvFj6/l4MfjUquynf0dbW1xYTYJdbdYVg4rNifjGaWO94Ky7tO5AqtaAQHj0y38dBfFCpuFDSsqjcDtIjAEPwsGx5RFUVWVTvCVH//tbK0Oq1NlLMRLcIJPYGR2SqZGukCbzubmKqxmEz5L7dyCHxoHpBqOAZeOHz+Rgz81Kwdfx1FmZmNTZfI8ctYm3VulTrocfB1LmarAneHSaXXPvigCicBtPH6JuPuWmGUv+L8+M8JtX3uJiVDmQNDIuJrZaimb/UPd2FSFzWLK3uoowalXQ74pbvn7Z2DKO73KNoWe8WR3w5nYrWY2N7sYlK65BV8P2Or9eUooF19KyQe+/Ro/O5Cm6ZuvT8vBz3xRBOXWOXhhnFhcgrMawv7Snp2gM9GTCNj+yaOH+MovTkx/392WcOmky8FPZWd7NROhKGcW2tLEADAEn/0XxjnR7+NXpzO7D0a92pBuy+yT0Go2sa11AX0+SlDwTw348I5oQu1M78P3jAVxWM3UlJelfX9nu5tz4UrkXENQdMHX+/OUkIXvm4ry1rkxfrJ/huBLqfXRacyYfaKzs91NIByjc9CX/J5L3cqPTqmLvKuNWFzyxKFeHn3rwvSYlztZfJUwHGoyXRQvIjPOIMGyF/zRgMr/fW0Owff6tGIhqz3t+zvbqznSO8FUNAs3TQkOMh8JhKkV2kUvg4WvByNFhpTNne3VDMRcxCaysPDrSs/CH/Wr8+iNs6NEUvu9hLyq4ExLyYTMFv7O1P5MuuCXuh8/MfikhbPDfgLaJKtTA/7kOq42tV48Nu9FcU1dBVV2C28bfvyLwhB8TfBfzyD4kVicgF8TfEt6wd/V7iYcjXOsd2L+HeqCX0KpmWMpgj9pTZNpgd4BMr2QgfqOBqUbS2gEYhncZ7q4VTar3vGB0gnajk6q82gyHONQatpgIge/Ec/YJHUVZdNy8FNZWeukpryMt8+NgaNGLSz1wLU3OekqdXbva6dTLubudohHwdeXcHulS10FMJkEO9qrDQv/IjEEXxP8kwM+hnxTs97v94YoQ6sCzCD4en5wVn78EhxzOBIIU4MS/MNjlrTrZMqs0GlxOwjatLuDQIa217qF76hW6Z8laOEDvNaVItKpVbYpbZHTIYRgZ5tbNZtLWPilLvh6Dn4bhzxeHFYzrdWO6XfUidTMC3jGglhMghVV6X9roDpnnhrw4Z+K5vHAL02WveCPBMIJv/Ovzsz+cV0Ym8SOZpFa0wuaXk26P5vGTvpFo4R8+KOBKZqtKkj2Wt/sU2YiFMEbjGS8DQclZq56LVMjkx8/Ifhulf5ZQj583XCoKS+bLmbT+ujMfVEE5cfvGvQzYapSC0rdh5+osm3mSI+Xzc1VXLuujl+dGVHBaVAN1AC8F7Q++A7MpvSuQYBdK6uJSzhkNFJbMMte8FsmDvIzyxeot8Wm32ZqeMaC2Oex8EFZ+W+fy+I2swQt/LFAhFZbgBgmnjs3+y6oZ2xu37ROY4saZu4bmdU9WxEcVV02TWZl4ZaSha+5dG7f0shb58cIRbR4jmbhx8sbZrX8TUeimnRYE7xS9+F7L4CzjqjJxtHeCba2urhqbS2+UJSjvZqLJ2Hhn8tYmJbKjlYtcHuJCn6/N8RYIDz/ihfBshf89tAJ2qY6uaVNpg3cesaC2MXcFj4oy6xnPMjgfIOWE0Hb0vHhjwSmWGH2M2V1c6TPz/jk9JNxvkCbzprVa9T658+kXyGY4rsurystH34gjN1q4uaNKwhH47ylX/x9A1BWyVDYSjgWn/c70qtJ3+4JgK2qtF068Th0PQvNOzk9FCAYibFNE3xISZSwOlR9h+bSmU/wXU4ra+vLszOwSpD/+1wn1//989OD/zliWQv+VDSGJaos7b0tZZwbmUwEjXQ8o5PU27Uvfh4LH7KwOhIWfim5dMLUmXyI8jqkhF+dmW51XhidO79cZ8O6tQCMDHjSrzA5miy/1y38EumnM+oL8i3rN7hSHsBsEsm7Ra3KNtvvaFo1qbOmtAW/+yXVKXPHXYmOsltbXDRU2uloqJhuYLnaiI2dY8g3Ne9FEVTNwv4LFzFTusiJxyXPHBvguo46rObcy/OyFvzRQJgKoSztnY0qGJmardPvDfGfR/tZU61lVcwh+KqaVMxfcaundpaQS2c0EMEtJ7C5GnCWmXk9xfUVicV56I3zrKp1UpshB1/H6XDiFVUER9MUJ4Fm4WuCX16nMpnCpVFgs3roOW6Kv4rj4INsb3UlxUzLwX/w9XPYLCY2N1fNuy29mlQ6akvbpXPgIbC5YMO7OewZp7zMzOo61YDwmnV1vHl2lHBUM6bc7fgGzgKwe2X6TLBUdrZXMxoIc360dH5H2bD/wjhDvinetblx/pUvgmUv+E6U4Lc549SWl00T/L/7zxNE45IbVlcCAiy2DFtKVpPOm6lTYhZ+PC4Zmwzjio9jKq/j8lU10yyz779+jq5BP3/27k0Zc/BTCdlUe4VYutL44JiyaiHZs6dE/Pg3jT+qnpx9iWvWqPmrvlAEfH0Mm2p4/GAvv3/9GhoqMxsNOno16aTFVboWfmgCjj0GWz8AVjuHerxs1voFAVy1tpZgJMZBLYU14GzGHujlxvV1XLMuc0dWnV0rF9jSpER4+mg/VrPgxssa8rL9ZS/45UIJryni58q1tbx2egQpZaJq8veuW43LGlXW/TyCtrPdzSHP+NyDlhN5+KUh+BOhCLG4pDw6DuV1XL22ls5BP4MTIUb8U3ztl6e4rqOOmzdmeYJWNFIjx1U16UyCo9MtfCgNP/6FN7gsepwu5w6YmuAWVw+xuOTNsyNI/wAv9Jhpctm5+x1rs9qcXk06FCsvXQv/2E/VOb79t4nGVI3KthZX4u0rV9ciBLzapS7oT/eUYRcR/vKdKb2aYlE49EP1OIOOhkrKy8yXVCM1KSVPHe3nyjW1VNmtedmHIfhoWSdTfq5eW0v/RIjTQwG+9PhRVlTZ+PQ71qkAa4Yq21R2tlcTisQ50T/HGL8SS8scDYSxEMUWnYDyeq5eq4T49TMjfPXpUwTDMb743uyse4Dy2mYaGJ/9Q43HVFWqowQt/Ne+gVeW8/M1fwEINgbfpsxi4u2T3YhoiON+J1+4YyPOsvQ1DDPRq0k9U87STcs88JDqidS6h85BP1PROFtbk4LvclrZ0qxcX4c84zxxXn03Ky0pnzTxOhoAACAASURBVPfQf8CPPwmnfjFr82qmtPuSEvzOQT/dI5N5c+dAjgRfCHGbEOKkEKJLCHFvmvcvE0K8LoSYEkJ8Lhf7zAWpLh3C/oSYfeHHhzjk8fKF2zdSbrMoX3KaPjoz2buqBpOA77/enXklIdS2SsSHPxoIU41WBu+sZVNzFVV2Cw++1s3Db57no1etYl3D7C6imSivbaHe5OXfftU9PQshqP1wExZ+iRQejZ5FnniCh2LvxFK7Epq2Y+1+kd3t1Rw7eQqAiro23rutKetNmkyCvatrOTAiSrOB2shpOP867PhtEILDnmTANpWr19ay//wYf/7TI/jtzWph6kDz/T9Qj7370+7myjW1HOn18ta5IrwoHn8cxs4t6E+eOqJqNm7dtCIfRwTkQPCFEGbgW8DtwCbgLiHEphmrjQJ/CHx1sfvLJSpoq1na4QCrap00uey82T3G7pXV3LlDOwmj2Vn4jS47v3/DWh7Z5+HFUxmqSaGk5trO7KNjNgmuXFPL2+fHqXGW8ZmbOxa0PVGxAjthzvf288BLKemZqVW2kLTwi7346tf3gzDxr9F3qQK+tTeC5w1uWGkn7FX1Bu+7flfWd0A6996+gYGoCnDKYr/ozeTgwyBMsP3DABzqGafSZmFV7fQRoVetrSUSkxz0ePmtm69WC3XBH+5SFw2A3gNpd/Pxa1fT7HLw+R8eStY9FAPhAPzH78B3b8p4sUrHU8f6uaLNTsPIm3DwP/JyaLmw8PcCXVLKM1LKMPAwcGfqClLKQSnlm0BRDaMcCYSpMmk55VM+hBBcvbYOIeAv37s5+SONBOfM0EnlMzd1sK6hgnt/dEgF7dJhdZaM1TYWCLNWaIVSNSqPXg+qfe5dG3A5FuhrrFS3qx/cUMbXf9nJqQHN/aULvh60tVWCuay4XTrBcXj7B3jXvJcBaqh2lsGaGyEe5ebyLhpQn2n1qux896msa6jk2m0bAHjp4MmcHnZeicfh4L/DmndAlTKYDvdMsLmlatYEq8tX1WA1C7a3unjfFZeB3Z3si7//ByDMsO4W6DuQNj23wmbh/3xwG2eGA9z3zKk8f7AFMH4ekOr8+Jd3q1qEuRg5je/xL/BXQ3/EQ8O/CQ++B578nPouc0wuBL8FuJDy2qMtuyiEEJ8SQuwTQuwbGprDSs4BYylpmYSV2+Kzt67nX3738mn+RuXSyU7w7VYzf//BbQxMhPjfT55Iv5LVXjIunZFAmI2mc0iTBeovA+A397Tx9Q/v4Lf2tC18gxUquPuZKyqpsFv4/A8PqiC37qvWLXwhtPYKRWzdvvWvEAlwet3vAlBbUQZtV4DFzlrfm3xyh5aRVXlxPtmbdm8E4KEXDjDsn13hXJR0v6xEe8dHAAhH4xzvm2CbVh2bSrnNwj9/7HK+9ZFd6mKgt0mORdVFo+NWWP8u5dbzXpj196CMj7v2tvOdl88UT0M13ZXzwe8pI+mh34RDj8xeLxKC578M/3gVzrcfII4J387fh99+BP7oEJiKMw8/3b3qRVdDSCkfkFLukVLuqa9PP10pV4yk+vCnlOA3ux28Y8OMjJNIaM4q25nsbK/m965bw7+/cZ5XOtNYqCXk0hkNhNlsvoCoW59IS3WUmblzR8ssiy0rKpT4uWJjfOk3NnPQ4+W7r5yd7dKB4m+gdvDfYeU1eOzKrVVTXqYu5iuvRpx5gU0VAZWHXlY+z4bSY9HiGPbwOF/82dGcHXZe6XoGzDa47N2AmqUQjsZn+e91rl9fnyy0crerUYddz6guo7t+B5p3qvcyuHUA/vSOy2issvP5R4vEtaO7pdqugP/2c2i/Cn78e/Dta+CJP1bummM/g3+8El78Cmx8L/+j4UG+4P4q7t/43+oil27+bw7IheB7gFRTrxXI0CyluBgNhHGg+/D9mVeMZu/S0fmft6xnTV05f/KjQ7O7+pXQIPOxQJiN4ryasZoLNAsfXz/v2dbEbZsb+YdnTjE0qHWVTD3Ri7mBWjwOo2egdQ8jWqfMGqdWeLbmRhg6ofy3F2ndA4mOmf/lMjs/P9zHk4f7FnvU+We4E2rXJgwkvcJ2W2t6wZ+Gq01Z8m//AMoblIW/YrNy7fRlFvxKu5Uvf2AbXYN+vv5sZ04+xqIYP6f0oqJB9Yb6rz+Cm/5CtY849Aj85FPwyEfBZIGPPsbY7d/m6fMir9k5OrkQ/DeBDiHEaiFEGfBh4LEcbDfvTPgnsUrNzz6n4E8tyMIHzbXzoW30jAf5xnMzTkKro2T64YcmhmlkGFZsyc0G7S71Y/D1IYTgr9+3BbvFxMuHTiER6n2d8rritfD9/Wo2sbud0UAYs0kk4xlrb1SPnjegchEZF1o849oWM5ubq/jS40cJFHtL4OFOqF2XeHnI46XKbqG9Zv52Cbjb1e/w1C9UwNdsVb+Vho1zWvgAN6yv50O7W/nOS2foSlfjsZSMn1OfRY8BWmxw3Wfhoz+Fe8/B3a/Ah/8d/vursOYGnj0xSFxSGoIvpYwC9wBPAceBR6SUR4UQdwsh7gYQQjQKITzAHwP/jxDCI4SYv8Y8j8TiknAwZWDJ1ByCHwnOWWWbid0ra/jg7la+98pZTg+lbL+E0jJdPi0Y1pgjwRcCatYqYQDqK2189tYN+MeGiJRVqU6ZOsXsw9f9tO6VjE6GqXZaky6uhs3JLKPK7NMxZ2G2gq0Kc2iUv7pzCwMTU3zz+a7FHXc+iYZhrDs5sQw1M3r3yursspT0rpkyDjt/J7m8aUfGwG0q995+Gc4yM196/Fhhe+yMnwf3yvTvmczQuBUuuyOhKS+dGqKuwsaWlvxLYk6iAlLKJ6WU66WUa6WUf6stu19Keb/2vF9K2SqlrJJSurXnWYyHyh/eYASHTAmEzWnhZ5eHn44/ue0y7BYzf5V6EpaQD79hUhOYXFn4oG7TB48lXn7kinbaHVMMRJwEwyk+2PJaCPvUHVaxoftp3SsZ9YdVho6OyaSyVGBxLh1INFDbvbKa/7Krhe++fIazxTrAe6wbZCwxhL5nPMiZ4QDXdmQZi3Npgt92JdSvTy5v3qEFbjM03dOorbDxx7es5+XOYZ46OscozXwzfl5Z+FkgpeS108Ncu652wam7F8OyrbQdDUwl2ipgdc5v4WeRh5+O+kobn7m5gxdPDfHL44PJ/ZVIWmZ7+AwBSzVU5LAYZMUm5avViq0sZhM76iTDsXK+/UKKBVvMufgJwW9jNGWITgLdrbMYCx+0rqEqg+ne2y/DZjHzV48XaQB3RHNd1imx1tsmXJtFbxxAuYIqGuGqT09frgdu5/Dj6/zXK1dyWWMlf/3EsenGw1IRmlAJCFkK/skBH8P+cFb9g3LBMhb8COV6hk5lYxYW/sUJPsDHrl5FR0MFf/3EMZVFYC0Nl04wHKODbkYr18/bR2hB6HcLg8cTi9z4KKus5f6XznB+RPtuyou4vcJ4t7oIWh2MBKZUSmYq625Rs3mbdy1uP45ki+SGSjufuamD508O8ezxAlqwmRjWBV/58F/tGqauwsb6FRXZ/b2tAj57AjbdOX25Hridx48Pynj4y9/YTM94kPtfPL2Qo88NuiFQncGlMwM9i88Q/DyjLHxN8Cs0wU/n95NSq7S9OJcOgFU7Cc+PTvKdl85oefjF79IZ9U+yQXgIuC/L7YYbtELsgSPJZcEx1rS1YhaCv/655u4pdgtfs+LGJiPTXTqggrWfPQ7tVyxuP87pLZI/dvUq1taX81e68VBMDHeq7Bq7Cyklr3YNc81CXRXp1tUDt1lY+KBaLrxnWxP3v3g6MYdgyRjXYzvZWfivdg2zpr6c5gxD23PNshX8kUA4xcJfoQJF6UQ4FlHvLcLCB3UFv31LI996oYuIya5SPYt8eEOg5yQ2ESFSN7NTxiKpalZVlQMprongGA5XPfe8cx3PHBvgkGc8xcIvwsCtJvgxrX30fLMALhpn7bTPX2ZRxsO5kUl+9PbcPu0lZ6QzEbDNuauiaYey8LP8zfzpHRsxCcE/vrDEQe6Eq2/VvKuGo3F+fXY0e5dXDli2gj+WWnSlFQOldevobYwXKfgAt21pJBSJ441qXROLPDUz2ncIAHNzDgO2oKy41MBtLAJTE+Co5vYt6v+ia9CfyEMvOgs/HlMBRPdKxifDSMlsH36ucNZAJDAt5nNdRz0VNgudA3O4IQtBSkpmzl0VzTuUa28iw/Ccmau7HWxvcy39dzR+HqzlyRYhc3DgwjiT4ViiaeNSsGwFfyQQptaq9dHRc6Wn0uTv6j+0iwzapqLfto1FtHztInfrmAePEpFmHM05KrpKZcVmGDimLLaQNszaUUOTS31Hfd6QugsQ5uLz4U/0QjwK7nbGtPm+NRULT9vNCl04ZrRJbnLZ6fMW0fkzOaqOUbPwX+0aZk1dOS25clU07VCPC2hG1uxyqPNoKRmbkYM/B690DWMScNWa2iU4MMWyFfzRQJi6Mq2IRc+kmNPCX/yJ21ilLhrjYe1rL/LArWPsBF2ymZrKLINuC6Fhk0q5HD+f9FE7qnGUmXE7rfSOB1V6o7Om+Cz8xG17++wq21zjTN8mutFlp3+pxWwuhpMZOrqrIqeByMYtWQduE3/istM/EUo/XS1fjJ/POmD7atcwW1vduJz5GXaSjmUt+DW6ha+X+6dLzcyhhd/osiMEDE1pxUVFbuFX+05yUq6kypHd4I4FoWfqDByd1UenyeVIipmzrvh8+HpgrnoVowFN8PPpw4dZk6+aXQ56i0nw9ZTM2nUJV0VOBd/qUM37sgzcAjS5HcTicmkbz2WZg+8LRThwYZxr1y2ddQ/LXPCrLWHlb7NpFW5pLXztR5UDC99qNlFfYWMopFv4RSz4k6NUhoc4Z12Tn4KQBi3zZ/Bo0l3hVILf7LInxay8CPvpjJ8HBLhaGdVcOrPSMnOFPgFsxkWvyW1n2D+VHAJeaIZPgckK7pX5c1U0Lyxw2+xSRlrv+BL9zoJjMOXNXGWbwq/PjBKLyyVLx9RZ1oJfZZ5Sub96N8O5BD8HFj4o3+vAZEqf/WKl/7B6sC+8l3tW2CqhelVaC78x1T/tLMKOmePnlRvQYmNUc+m483VbnsGl0+SyIyUMTBSJlT/cpVoBmy35c1U071TnQoZWyTNp1AR/yfz4Ka6++Xilaxi71cSu9vx0xczEshR8KaUafiKmlNiXaT7qtC6d3GXpgHJX9CYEv4h9+FrK5Fjlhvzto0EL3CYEX1mzzW4H45MRVSlZjBa+HphDBf8rbRZsFvM8f3SRJIK203u9TwtuFwNaSmZeXRVtWk3DudeyWr15qb+jsexz8F/tGubyVTXYrXk6bzKwLAV/MhwjHI2rwquycmXlwzwunRwJvttOj76bYrbwB44wKtyYXfmbr8mKzTDSpbJehCnhWmtKWGZB5cMPjavUzQJzYXRSNcFL8dOOBsLU5MudA1oDNdcsC7/ZnfIdFZpYFEbPQl1Hfl0VKzarbqrnXs1qdbfTit1qom+pXDpZVtkOTIToHPQvaf69Th6iccWPHmhzEoSySvUP5rbwF1Fpm0qTy85LEQvYSGYAFSP9hzkp2/OXfQKqp46MwYVfqxRMbcKPfive7w2xJlF8Nbq4VsM54M9+eoSe4QmeDfYkftRjk+HZVba5xlmdJkuniCz88XMQj0BtB/vOjWE1i/y4KkxmaL8aurMTfCEETS4HfUvl9ho/r4wW++zpXqns61Z3a1cuYTqmzrK08Ec0wbfHg8rCN1uUBR9Ok4efawvf5WAKTSCK1cKPRZFDJzgUbctf9gkkM3V63p5WqKLfivd6Qxnz0AvBib4JpsYuqIuU7tLx57HKVmdGtS2oea6VdsvSWa9zkUjJ7KBr0Meauor8uSpWXQOjp8HXn9XqTS77Elr451TAdp4kh85BH0LA+hWVS3NcKSxLwR/TBL8sHky6c8oq0lv4iaBt7iz8oCxywfcPIGJhzsnG/GWfgAryWexKQFMmXSWCbePBjEHLpcY7GWHQN0Wb0OYsp7p0lkTwZ1/wVPFVEVj4w9rMhNp1dA766ci2WdrFsPIa9dj9SlarNy1l8VWWKZmdg37aa5w4ypbWfw/LVPB1C98SnUxm6JSVQzhNn3E9D/8iBqCko8ntIIi2rWIN2vqV9TQo3fl1V5jMicHoifRD1LSwmvIydSueSEssrIXfNaTu/loTgr8SKSWjk0sg+I6aDIJfgErSdIx0grOWoMXF+dFJOhryaLk2blMu2CwDt00uO4O+KaKxPKevSpm14HcN+OloyONFcQ6WpeCPBlQhhjkaSGbo2CrzXmkL0FBpIyx0C78Ifqzp8KnWu4PSnX93hT4rd8bQ5sSteJG4dPSeLFfXBohJQbSiiYAW/F8aC3/2HU6zu0jaKwx3QW0Hp4f8SEl+LXyzRXUgzTJw2+S2E4tLhvJdfDU5qvRjnoBtNBbnzLCfdfm8KM5BTgRfCHGbEOKkEKJLCHFvmveFEOL/au8fEkIsskn44hgNRCgzC2XRl6W6dDL10hE5s/CtZhO1lQ6iwloSFn5eM1BgDsHXrNcisfBPDfhxWM1c7vbRTw0H+yYTOfj5F/zZDdQAGqscDPvDTEUL3CZZS8nsGlQXxbxbryuvUUPis0jXTcSDxvNsXGXZFrl7ZJJITJauhS+EMAPfAm4HNgF3CSFm9tO9HejQ/n0K+PZi97sYRgNTNDolQsaSLh1bRWYL32LP6QAQFbi1FbEPX03mGsaV3ywdSPbGn9FdMOGfLnOqu6sC+/A7B32sa6igMT6AR9bz0qlhRrQ7xSURfJjdQE1LzRzwFnAEZHAcAkNQ10HnoA+LSbCytjy/+1x1rXrMwspPzfjKK1kKvj5gPa93QXOQCwt/L9AlpTwjpQwDDwMzRtZwJ/B9qfgV4BZCLHL228UzGgjT7NTKs23arVXGoO1UzqpsdZrddoKUFa+F7+tn0uImioXqfItZ4zYl6NWrpy1uctvxBiNMhqNK8GYUHi0JfQdh378Aql1zR0MFlgkPAUcLL3cOJTtlLoVLB2bn4ieymQpoOIxo/eZrO+gc8LOqrpwyS549xc071ZjQmX78vkNw7vXpqybSV/P8HWVZZau7BtfWF0bwc5GH3wKk1jp7gJljftKt0wL0zdyYEOJTqLsA2tuzmxqzUEYCYVY5ouAlJWibwcKPBHPmv9dprHIQiJdRGw2R/7HFF4F/gAlLLVV2C1Zznn+85bXwP4/Mcumk3oqvyxC0zCvBMXjow+DrJWhx0ee1sb7eBsd7cbTezoHT45wdVhfs2vI8tUbWydBAbcms17nQx1TWradr0MOGxiXwTZut0LZ3ej7++Hn4/m+o3+ofH0vckVc5LDjLzEvg0jmv8u/trjlX6xz00+J2UG4rTAlULn7N6TRrZnejbNZRC6V8QEq5R0q5p74+y2n3C2QsEKbBprVGnubSSZOlEw3lzH+v0+y2MymtRENp9lcM+PoZM9Xk33LVKa9TGTspTBMzZ/XSB22f/F/gH4CatVie+jxufGwp9wGSFSs3EJfwxKFeAKrL89zeNpOFr7l0Cmrh97wFNhdTrpV0jwSWzje98ho1IjM4pu7CH/mYeu7rVcNpNIQQWpvkPH9HY+eyaouc97TVeciF4HuAtpTXrUDvRayzZIxME/yUoG26ubaRYM5y8HWaXA5C2JgKFtnEIh3/IEO4l07w0zDNXZEhDz1vHP0pHH4Ebvhf8JsPYg6N8+fWH7CuTAlu+5rLqLBZ2H9+nDKziYp8W2sZOmY6yyy4HNbCWviefdCyk7MjQeIS1i1VMdHKawCpXDhP/Sn0vg3XfU69d+HX01Ztdjnyb+GPnZ3XnROLS04PFS4lE3Ij+G8CHUKI1UKIMuDDwGMz1nkM+KiWrXMl4JVSznLnLAXhaBxfKEptmdYLP5GWWaHNtZ3hVw+Oz3ubtlAateKr6FQR+vClBP8AfbEqavLtqpiDFS61775xLVNnqYK2vgF44n+qCUvXfRYat/Jq08f4gPkVVpz8/wCw1K7i6rXK6q4pL8tP++hUnJkzlZpc9vyLWSbCAdXeuvVyTmm+6fVLZb227AazDZ77G3jzu3D1H8A77lW+/QtvTFs179PB/EMqltE8d/LhhdFJwtE4HQWosNVZtOBLKaPAPcBTwHHgESnlUSHE3UKIu7XVngTOAF3Ad4BPL3a/F8u4FmirtmiCn1ppC7MDt/5+qGzM6TE0u+2EKCNWjII/OQrxCD3RKmry7aqYA5vFTF1FmboVd9aoBmrxJSieefwzSsj+ywPKVww8aPkAZ00rESeeUFOXKpu5br1yN+Y9qA3JBmpp3FoFHXXYe0AZSS176BrwYRKwui7PGTo6Vju0Xq4uOO1Xw01/qb6nlt3gmS34g74pIvkqvup+WT2uvn7O1TqXKm11DnISkZNSPimlXC+lXCul/Ftt2f1Syvu151JK+T+097dKKfflYr8Xg15l6zZrqWwJH7521Z0ZuPX1J0cg5oj6ChshyoozLdOviq66pyoLauGD1kpat/BlXIl+Pjn1n3DqF3DzF6E+2Rb6xNAUP2r9guro6WoBs4XrO1RTt7wXpulUNKiuojNoTJ0OttT0aD/j1j10DvpZVVuevzbR6dj4XnC1w4f+RRVkgQrm9h2aFo9rcjuQEgZ9eUpf7X5ZVf/qc3cz0KmlZK4rdcEvJfSBES7zDJdOuiEoUz71uiK3XRotZhNYnYhi7JapFV31xdzU5bvoah4S1muGnvA5Z/CYetz93xKLJsNRPGNBbCv3wG1/B3s/BcDK2nLWr6hgZa0zv8ekU7MGRs/MWtzssjMSCBOKFKD4yrNPNQsrr6Nz0L/0Qnbl3fCZg9PvwNuuUL2ZUoadN6X2ZsoHZ1+GlVcnLzoZ6Brw0+SyU2kv3J3zshP8UwPqKttgTxO0hekuHa3FQK4tfACTzYk5VoStFbTPPISrIN38UkkUXy1VA7XAiDoPypIifnpQWYodKyrgik8pX7HGD+++mj9/z8wawzxRuw5GTs9yazW5VXC7IFZ+z1vQuodwNE73cKAw2SemGRLWerl6TAnc5nVYzESfqjRefd28qxbkojiDZSf4J/p9NFTacMqg8sfqKZfpXDqatZuPPuxWmxNrvAgFP9FWoZrLliKneg6a3A58oSiTFm3mcL4zdQJDKkU0Bd1ASNf7xOWwLt3Eoto1qurbNz3XoWmpx/jpTPTBRA+07OHcSIBoXOa3aVq2OGugbv20wG1TPofF6P77VXMLfjwuteK9wn5Hy07wT/b7uKypKtlHR8+wSFj4Kf109J7bebDwbY4KbHIKme9A5ELxDzJlcmBzVlJfWWgfvvqhDkY0izvfufiBISifXvvROejHahZL57rJRO069Th6etriadPBlpIZ/nsorG96Gm17lYWvpVhX2a1U2Cz5yWY6+5IquGrcOudqPeNBgpFYQXPwYZkJfjQWp3PQryzXKX8yQwfSjznUBT/HPnwAu7MCs5B4/UVWfOXrZ1hUc1ljVf7TDedBvxXvCWtim3cLf1iNVExBH+iR94rj+ajRhsnrrQw0Cjbb1rMPTFZo3MapATXQo1DtAmbRdoWK96R8V40ue37cXt0vq94+prnv9PSAbSEzdGCZCX73SIBwNM6GFVor5LKUFLK0Pvw+Vaqd4zx8AGe5urXrH8lz5skCkb5++qJVS1MiPw+69eqZtCj3W759+JPDs1w6nYN+1hXYKgOgqkU18RuZbuE7ysy4ndYCWPhvQeMWsNrpHPTTVl2YgR5p0YedT/Pj5yF9dfw8jHXP686BZA8dw6WzhJzoV1fZy5oqp7dGhuTz1PYK/gHlv8+DpVtRofY3NJaSeRIcy5sVe6THyw9e7543Fzk60U9/3F1w/z3Aiio7QkCvd0proJZHC19KZGCIt0YseCfVwPRQJKYN9CgCwTeZVKbODMEHrZX0EhVfRWJx7n/+FPGet6FlD6CyT5as4CobajuUmyVF8JtdDjUyM5ec1fPvZwv+w2+c583u5PnaOeinodKGy1m4DB1YboLf58NsEsrXONPCTzfXNg85+DpVlequYWTMm1z449+HR/9bhr9YHF9/tpM//9lR3v+PryYCkekQ/gEGpVvFOQpMmcVEXYVN3Yrnu4FaaBwRj/Lk6Qi3fu1FXjw1lBzoUQzBSNBSM9MJ/tKNOnyla5ifPP0spkiAV0IriRR4oEdaTCbNj58M3Da67Az7pwhHcxgz635ZuQDrN05bPOSb4t4fH+Y3/+l1/uaJY4QisYL30NFZXoLf72NNnVYcEvZPt/BhdotkX39e/PcAlVXqBzLu1Vw6UqoKwaFTednf0R4vG5uq6B0P8Z5vvMJ3XjpDLD6jb1A4gCUaYBB30VhszS671k8nzy2SA8pdFLbXUmW38rHvvcEXfnwYKFzv8lnUroPRsxCfnnO/lNW2R3u87DCpi86f73PwwW+/VtCBHhlp26uGpGjnTLPbjpTJOpxFI6Wy8FddOys19GivMuKuXlvLd185y3u+8Qqn+n1FYTgsM8GfSPqmZwZtYfYQlDxa+Gbt7mJ8QrO2J3rUyenvh1g0p/saDYTp9YZ4/85mnvqj67lhfT1/++Rx/uwnh6evqAWppbMBZ1lh2rfOJDH5KsOYv5wRULNqVzS28vgfXMvvXbeawz1eLCbBqnwP9MiW2rUQjyR7r2s0ux2MTUYIhvNffHWkZ4LrHN1Iu5uP/8bNiR46RXNR1NH9+B6VTZTz4PboGZjwpHXnHO2dAOAfP7KbBz++F18oQjASK4ospmUj+P4pVTGZ8E2HA9NdOqDKo3ULf8qv3Dt5yMEHlPsI8PnUyUG/Jr4yPivXerHoFseWZhf1lTYe+J3dfGh3Kz890DNdJLRJV47alpzufzEksisc1Xl16fhH1Xe+orkNu9XMn717E4/efRXfuGtn/gd6ZEuG1MzGKjsVTNKfK+t1Do72edlpOo1o2c3vXLWKX3zmOv7mfVvY2pL7Ed8HgAAAIABJREFUxIZF0bxLBfo1P37O01cT/XNumPXW0V4v7TVOXA4rN6yv5+k/uoG/eM8m3rez8L+rIjmT889JPWDbqPmmZwZtQV0AdAvfn78qW0B19QMCfs3C70+xtid6crqrIz3qorK5Wf0ohRDcuaOFUCTOy51DifXCXtWrpWZFfgbPXAzNbjv+qShTZW4VtJ3ZvjpH9PeqHuorW5OffffKGm7fWrDBbLNJpGZOF/zVope3bb9P4MTzed29dzLCyOgYTeFuaFUB21V15fzXK1cWPIV3FrYKaNoGZ14EkhXJObPwu56FisbkRTiFIz0TbGlJxsBcTisfv3Z1/ttoZ8GyEfwT/Ur0NjRWqvL0SBrBT3Xp5DEHH0j02A/4fSqQ1H8IzFrvmpQBDrngaK+X1mrHtAyBK9bUUGm38PSxgcSy4V7lKmhuXZXT/S+G9hp1YRyNV0AsnH5ITQ4YG1YXu47V8w+xKBgVDeqcnSH4a0ZfokzEmOw9mtfdH+3zst10GhPxRIZOUbP+NvC8Cf4hKmwWasrL6B7Owfkz3AknnoBtH5qVwecNRjg/OpkwroqNZSP4J/t9VNgstFY7lNhDGpdOStDWn78qWyAxJ9cqp+ga9CsLX2+vmmML/2jvBFtmnIBWs4mbLmvg2eMDRLVUTe/QBSLSzJr21pzufzHod2QXprQhNHlKzZwc62eCCqqrCu9nzYgQyo8/o/jK1avcC/6R/I6YONozwW6hJRW0XZ7XfeWEDXcAEjqfAuCyxkqO900sfrsv/p2qz7nmj2a9daxXv5sufJZbOpaN4J/o97GhsVLdeupW4lxBW1/++ugACZeOQ4TpPN+jCjjar1J9z725E3xfKMLZ4UDaE/CWTY2MTUZ465zKZJga72cYFyvrCp9NoNNe46S8zMxpv3b3k6fAbdw/yKTVnZdt55SatdN9+OFJhDa4OzIxkOGPcsPRXi/XlHWpNMQZM4iLksatUNUKJ38BwMamKk70+xIGzkUxeAIOP6oa6c0o0oNkvMyw8AuIlJITfTMydCCNDz8laOvrV4FVe55EQHPpVJgijJ3VWrk2blP91nNo4R/vUzGCLWmCajdsqKfMbOIZza1j8g/gs9ZhNhWPP9ZkEmxorOTYuOb/zEPgNjAVxRYeI+aY/QMuOmrXqSydqNbe+9xrEJsijsA8OUR8ZqptDjnaM852cQrar8jbPnKKELDhdjj9HESCbGyqYioap3tkEW6dF7+iPANX/2Hat4/2TrCiylbwPlSZWBaC3z8RYiIUZWPjjI6YmXz4UiZz8PMVjNIs/NZKkgHbxq1Q1ZxTH37S4pht4VfYLFyzrpanjw0gpcQ+NUTEkZ/B8YthY1MVB0e1UzUPufjH+yaoZQJrZUPOt51zateqTK6xbvX69LNgtjFSvZ0aOcb50fxMUZsMRzGPnMQZDyRTHkuBDbersaVnX2Jjk/r9H+vLXHg4JwNH4ehP4Mr/npzRMIOjvd6ite5hmQj+Ce0/eEMiQ0cX/Jk+/HJAKpePP385+IAaxybMtJSD23sc6axTgxyqcmvhH+mZoL7SRkOVPe37t2xq5PzoJK+dHqFajmGuyu04x1ywsakKTyh/DdSO9HipFRNU1BZRRk4m9Ewd3a1z+jlYeTWWmpXU4c2NjzoNx/t87Er470tI8Fddq+7cT/ycdQ0VWEzi4r+jF74Mtiq46n+kfTsYjtE16GdLkfrvYZGCL4SoEUI8I4To1B7TOvaEEN8TQgwKIY4sZn8Xi95DZ0NqDj6kr7TV3/f1589/r2N1ssIRZ238LOG6zepuwtWqioCiuRnHpiyOzCfgzZsaEAK+9ewJavHhrCl8rvBMNjZV4UW7OM/04R/7GbzxnUVt/1jPGNXCj8NdIhY+qMCt16OqSdfdRGVdC3ViIjsxO/YY/PJLC9rtsV4vu00niTnrVIuHUsFig3U3wan/xKa1Vbkowe87CMcfV2KfIX5xon+CuIRNl7CFfy/wrJSyA3hWe52OfwVuW+S+LpqT/RM0u+y4HFpaom7hzwraprh8fAP5tfABrA4aysKsFx4GnB1qWZUmuDmw8vUeHjMzdFJpqLSzs83N6bNnMQlJTWPbovebay5rrCQuzITMlbOzdF79Orz2fxe1/fM9vZiJI8pLQPCdNUpwRk4r6x5g7U1YKlfgFFOc7hmcfxsHHoJX/gHGL2S92yM9E+w1d2JqvzJ/bs58seEOVVfTu5+NTVUXJ/j7/025Ya/87xlXOaJl6KTm4BcbixX8O4EHtecPAu9Lt5KU8iUgz83MM3NCH3qiM5XJpaNdAPyDMOXNXw6+jtVBzcQJbCLKCVapZS5N8HOQqXNqwEcsLudNEbtlUyMNQvX0qagtnpRMnXKbhZU1TiZE5XSXTiwC/UcSfXAuhqloDO+w9l2nybooSmrXKZfO6eeUUdKwUeXoA0N95+f5Y2D8nHo88qOsd9nT000bA4j2Ky/miAtLxy2q6vbkk2xsqmRgYorRQHhh2xg9A3Udc7ZKP9brxeWw0qIVeRUjixX8FVLKPgDtcdEmkhDiU0KIfUKIfUNDQ/P/wTxEYnFOD/mn93fP5NLRLX49z3kJLHzzkCqWeSOoCX2VJrg5sPD1Ctt0GTqp3Lp5BfWa4Of9IneRbGyqYiheMd3CHzwOsSlVVxG5uJL5U/1+3FLrWFoqgl+zVhX/nHkB1r5TWdya4Md8A3iDkcx/K2Uy4Hv40ax2F47GcQ+9pV60laDgO2tUyvPJX7BRM/wWbOWPdUP1qjlX0Stsi67qOIV5BV8I8UshxJE0/+7MxwFJKR+QUu6RUu6pr198xsghj5dITE53a2QM2s4U/Pxb+Mg4YVHGiyPa8VU1q8ccZOoc7fVSZdeKzeZgbX0F72jWcpPz/Zkvko1NVQxEnMRSrfm+A8nnF5mff7TXSw1a1kZ58WUopaV2neq3FBxTgg+guaPqhJcTc4lZYEhlrdR2wMBhlVc+D52DPnZwkpjJBk3bc/EJlp4Nt8PgUTY7VJbXggQ/Hld3RXMIfiQW52S/r6gzdCALwZdS3iyl3JLm38+AASFEE4D2mIUDcWl5uXMIIeCadbXJhWE/mG0qUyaVWYKfbwtfaxtQvo7TIyFCkRiUOVXv91xY+L0TbG52ZWVxfHSLlsVTpH7sjU1VjFFB1D+cXNi7P/k8MDz7j7LgSK+XFqtmAJSM4OtBUwFrblRPNQu/XsyTqTOmuXOu+UMQJjgyv5V/tGeC3aZOwit2gKVsEQdeQDbcDkDN2cepr7RxbCGC7+tTbT3mEPzOAT/hWLxoK2x1FuvSeQz4mPb8Y8DPFrm9nPPSqSG2tbpxO1NO1HBgdsAWZrt0lsCHDxCp30xcJhu84WpZtA8/Gotzom8i+xPQ369aEBfpD3pjUyXjshJTah5+7wGw6tk7Fyf4R3snWF8ZUuJXCtWjkEzNbN4B5Zoh46xDImgv8yeK7dKiu3Na96pOj4d/OG9DupOeQbaIs9jXXL34Yy8UtWth3S3w4v/h1trhub+jmejf2RyCX+wVtjqLFfyvALcIITqBW7TXCCGahRBP6isJIf4deB3YIITwCCE+scj9ZoV3MsKpC/38s/+eRNc8QAVtZ7pzIGnhj55RdwD5FgCtRbKzfSeQcptZ1bpoC//0UICpaHxe/30C/2DR+u8BWtwOJi0urLFJVWUaDatCmDVae9qLCNzG4pLjfROssgfVXdU8g6iLhtq1qtFex63JZWYLwlnLWmeA4/1zWfjd6tHdDls/qF73vDXn7sLn92EVsdIM2Kbyvm+D3c0fj/8tfYOD2U+/ykrwJ3CWmVldVySzEzKwKMGXUo5IKW+SUnZoj6Pa8l4p5R0p690lpWySUlqllK1Syn9e7IFnw6unh1mLh7rgGXj7+8k3wn5VjDETXfBj4bzNsp2G5tKpXrOb/7+9M49u677u/OcCXECAFCmuIimJ1L5YshbL+xJZkve0jieNt7h1ejLjdMbTOI5nWnt8ZnLcxmnak7TNjNOcunbsxE7s1nESL3HtOJZdO95kSZZlWbtEaKPETVzBnfzNH7/3ABAEQEikhIeH3+ccHACPD8DFI/DFffd3l0CeNyL4xbWTjuEnq7CNyxmc7jUViAi+aVbIpe8ktFgLtvPX622nEcM/2NJD/9Ao1TndmRPOAZ0+fNd/wGX3jt1eWEltbg97kvWL6Qjq/3OeH5b8gXZskizejowqStu26juzLpga+9NFYQV88TFK+4/yLc/jHGhO0ctvD+ozwOLEKcufNeqJck5qSxIPV1favr23hUV51qn+vtd1Gh+Mn2dr483RXfDgzMfvwQrpCJ4Z57C4elrkNHNaLfR3TKoV8N6mHvK8ntQ9jp5mXenrYIrL9A/SaE+rDucAzFmrU+5OI6RjT2sqUZ2Zk6FjU7U0HBIME6igQjqT94tpj1p89BXrlMUdz48bm2jT2NHHuWoPnYE5CdsJZBRzLqft/G9yk/ddej98MrXHtAd1QWTsml8Ue5tiMgEdimsFXynFO/tauaTMnmDVqRtNQfxpVzb29rPh7S6+QVfu5RexpLqIXSe6UErpDxdMKo4fbA0xq7SAHG+K/+LeNh3DdzAVlfoHqanpuF6wzS/W4Q1/2Wkt2tqiWDDUnnmCH4/CKoqG9RpHwn4x7THZJsu/BKFmaHg77u7B1m7O8+xloDoD2iGnSMnV9/Pe6DLO3f5tnd46EROkZHb0DtLZN8Rch4dzwMWCf6AlxLGOPpYXnNSx+BxfuE1qwkVbiGw/Gx7+wmvgmocBnYXS3a/HMEaqbU8/rBNsC6U+i3XYymV3+KJlbY3+ITx+4phOyaxZocNugfLTCukEW0NUFOXj6W3NrJBOIgorye1vJdeboF/M8KD+TJVEDXlZeI0Ob376XNynHNjzO0okRN7CdWfI6LNPTm4uj5beR64ahL2vTvyACQS/wRqqUueU2cdJcK3gv71XF23VqhNQvgjmroU9v9EZCQM944uubOzY/lnORx9TEJKo2nY0tUUmpRSH2nqpT9Xj6LOKrgqc3Q9+9kwt+O1NR/SCbfVK/YdJePjzS/N0+MwNgh+oQIZ6OafcG1/wO4/oTpvR4pVboBdvP/2FbicSw+z9T9Oiiile/Z/OnN1poKJ2Hh0UoVr3J99xMKTPgJIIvn2mOKfcP4UWnhncK/j7WphbHsDXcwRK5+g83I7D0LzTiuFP4OEXnt149uIZRYhY/euLagAZm6kz0A0/WAGvPThhGl1T1wB9QyPUl6X4AbRTHR3u4duLtsVNm/TCek2U4J+Oh9/Wyzkl1rqOK0I6Ohf/gvKh+IJvt1SIFa9L/hxGh+DDH43dfvIg8zvf5zXfdUiOM/u7ny5LqqdxYHQGQy0ThHTaExyzKIKtvYjArFIj+Gmhf2iEDw62sW7+NOhqhOlzYKEuvGDPKxPE8O2QztkVfH9eDvVlAT452qFz4Qsrx2bq7HgeOg/D+4/A299L+ly2x5G6h58Zgk+ujwHxsbDXSiOs0emsOqRzah5+z8AwLd0DzC+0upL63SP4y4oHaeoa4ETswO5wemHM3N6yebD0RvjocejvjGzf9BijePh0hru8e9CC36CqGZ3Iw08hJTPYFqKmuID8HOen9bpS8Lccaqd/aJT11f2A0v+soio9eHnnCzqdbyIPPw0ZK1cvrWLj7mb+5e2D4/vib/2pHi137q3w5rdhy5MJn8ce1JxyDD9TBB8Y9U1nGiF6PYWMFNfrjf5y/R5GhlN+HvsYzS2wBoa4IqSjBX9NxRB5OR7uemozXf1RfXXagzp/P9761KXfgIEu2PxjfX+gB/XxU7w6egHTZzh4sPtpcu7MYprzZuLra6LhWJLRkOGzojkJdwm2hhyff2/jSsF/e28LuV5hVaEVmy61/lmLrotMl0q0aGt7/mdj0TaG/3nNIm5YXs3Dr+zi0PB0fXYCuiPksS1w3p1w4yO6YvDle3V/7jgE23pZm7ODmS/eHElFTUa/HcN3vuAXFGth3jpUx4O/3qGzmuxwzCkMOD/UpoW+JtdKX3SD4Fsefo23ix99eTU7G7v40yc+IjRg/RC2H9IFV/EKzGpW6r487/8TDPXD9n9FBrr48dA1GRGbPlUC+TnctOEKAL715EscSTQprD2o1/WSfDeCbb3UZ8gxcqXg/8feFtbUleLrtlrF2qdji66P7JQopFNQqgui0iB+OV4P/3DLSjYsqWRjYw5D7Va/8o+f0p7ZubfoXOCbfwK158Evvhq3+VWwNcRV/r1I8J3U0s4yyMO3c8HzZq3m2Y+O8NBLO1EFVn74KcTx7bBXhcdunObslNSU8JcDAqEW1i+p4v/etoqPD7fzX366Wfdpag+OzdCJ5bJ79QLltp/Bpn+he/o5bFULMiL75HSonrMcgKqho9z+2AfjQ2AQydBJUITZHtIpmSmfTacZ1wl+/9AIIsIVCyv0Pys3EPHeKpdExD+R4F98N9zxy7QNecjL8fDI7avxlc0mdzjEbzdth0+e1VWRduFLXgBu+mcdmjq6adxzBNtCzMqzPNemFIaM9bXr4qV8Zzd+AvQPMnD+Jev5z5fN4cn3gvzrTqs18ilk6gRbQ1QW5ZM/0AaenDM3rP5s4s3RC9g9uofh9cur+f7NK3j/YBv/9ektqAk6PlJ/uXYkXv8WtOxie+0tgGRMuOKUsSZ3fXO1h/bQELc/9gGtPTGT5tqD49c8ogivlxnBTw++XC//fs/l/Nnn5kJ7gw7n2OItEvHy47VWAB27r7v47BibAF+uly9eqeeG7n/p+zrksvpPxu5kh5xiRE4pRbAtRFWOJfh2CCsZfe06JdPBfbzDWD96UrOSB29Ywu0XzuaJbZaXfgoLt+E6hVCLdggy4b2nQmGlfk8WN62aycNfWM6WPUGkrx2VTPBFtJc/2A3+Mt7KvYKCXC+VRe7K0AmT54dpM6kePsYTf3o+jR19fPXJqBCYPTsghZRME9JJMyIS/5+17Ivamy2ZnQ6zUiavVNv3Fe9rHFGVfJYf04c8z6/PXmIEv6lrgP6hUaZjpeU1fTbxi/W1Z0Y4B/Qgi/rLYXo9IsJf37iMFQvnA/Dp3gMpP01DqxV3DbW6IyXTJlChx/lFcfuFs3ngYt2G4dfBnOSPX3SDrlm5/D4OtA9TV+Z39ECPSVM2D9r2c359KY/ctppPj3Vy98+3MjQyqo/jcP8ERVe9eDIkJRNcLPiMjsYX/Jlr4IGjULk4HValjlV85aeP3+Rs4CtPbhm/sBQoH+PNQaTqr3DEWohNNaSTKYK//I/gKy+HPXKvR/ir2y4HYOPW3bx3YGIvv2dgmNaeAZ222tvqjpRMm8KqcEgnmlsX6KK9x3Yonny3IfHjPR74kxfg4rsJtmVO9slpUzYf2vaBUmxYWsV3blrOW3tauP/5T1EnreOUJEPnUFuImpLMSMkEmODnPoPpOZH41zkvA36NC2foDn3A1bffyz891cCdT2zijgvraOzoo7Gzj3tDBdR2NBH9bg5Zp5i+gZPgydVeSk+L7hSYiL72jBY9n8+Hyi+mTvXytZ9u4Z4NC2gLDdLY0UdTVz9fu2IeVy6ODHYZk7b6SUvSL3TGERPSsRGrgGj+onN46OWdtPcOodDN0Ro7+li/pIqvXhY5DsMjoxw52cvVS53dUG/SlM3XtQe9JyFQxq0XzKapa4B/+N1eLuj6jFuA728eYPs7m6idXsB3blo+5uHB1lNoYeIA3Ovh2wUTpRn6ZfbmWAVj1zJ33kIe/4qOMf7Vyzt5+sND7DnRzeEBP91tx8c8rKEtRMA7imewC2bpdYAJvfxM8vATIIFyrqnPociXw7d/s4vH3jnIx4c72HGsix/HeLR2SqaO4bukj45NoEKPMBzoGbu94xD4SvjbL1/O+XWl/OCNfTyycR/v7m9lb1MPj2zcx8hopIL7eGc/QyPKlSmZYyizhsm0RQqwvr5+Pl++cDaNDbsZVcJz+4T9zT38/MPD486yMyklE9zs4adwOuZ4/vhX4NOZM+fXl/Lh/9rAyKhiuj8XEeHNv/sh3r7gmIcEW0OcUzIIIfRwkEO/14I/78rEr+MCwSdQTsFQOxv/x1o6eoeoKMrH6xG+88ounnw3SM/AMIX5+uMeXmgrFt1mww0pmTZWLj49TWNrTazwpi/Xy7N3XcTxrn4qi/LJ9Xp48ZNGvv7Mx2w70sF5dfpz0HCqxXuZSple/+HkAZitHSQR4eGbltM76EGO1fDBN68n2Bpi7ffeYuPuZu68pB7IvJRMcLWH3zDh0ALHM71ujBAXF+RSGsgLL6JNr6hl2mgnx9ojXsehtl6WFlvFVhWLdWgo2cLt6Ig+pc10wfeXQe9JfLleZhT7woMorlxUyeDIKL/fF4ntN1gpmf4ha53DTR6+LfixYZ2o9SyPR6gtKSDXap39uQUVeD3Cxt2Rxd5Tbs+RqZTM1mm5beNbLPhDRxHLYawvDzC3PMAbuyPrIw0ZlpIJrhb8oO4r79AZrVNB7czZ5MkI7+zQ2Smjozolc36hVUASKIeqc3SlbiLs3iluEPw4efhr6qdT5Mvhzagv6qG2kBYyWxTdJPj2EProhdvRUd04MEE+ebE/l/PqprNxd+RHItjaiz/PxSmZNt5c/UMYR/Bjkz7WLa7kgwNt4bTNQxn4ozgpwReRUhF5XUT2WdfjVENEZonImyKyS0Q+E5F7JvOaKXOyIXmRiQsor6wBYNsuXU3b1N1P/9AodT7L4/eXw4xl0LI7cYuFTKqyTYbdEz+mk2iu18PnFlawcU8zo1aMuqG1lzllAei21j8KK2OfLXOJDunYdB/X3UWTfB/WL65k1/EuGjt0EVuwLURdWcDdKZk2ZfOhLSald6hPH7dowV+izxbf3a8di0hKZszkMQczWQ//fuANpdQC4A3rfizDwH1KqSXARcDdIrJ0kq87Me3BzI7fp4BYnunhw4foGxwh2KqFvjbcH6Ycqpbr1rete+M/SV/m9NFJir9cv8/obo8W6xZX0tI9wI7GznBKZl25P7LOY1VcuoKo9gphUuj4uH6J/qHYaJ0J6cK0zFmMnBS24EfPm+iIacuCXkcrys8JH6NMS8mEyQv+jcBPrNs/Ab4Qu4NS6rhSaqt1uxvYBdRO8nWTM9Ct86szNUMnVSzBLxrt5L0DreG4a5mnWxeX+Up0SAcSh3Xc4uHb4xnj9NNZu6gSES1mdkrmnLKAXqgrmJ757z2amPYKQKTjY5I+OvMqCplVWsCbu5vDKZmZFKqYFGXzYLgPuhsj2+L8SOZ6PVyxsIKNu5t1RXsGdcm0mazgVymljoMWdiDpubGI1AOrgA+T7HOXiGwWkc0tLePziVMinKFTf3qPzxSsCtHqnO6wmOV5PRSNdOovvccD5Qt047VEqZluEXy7WjaO4JcG8lg1q0Qfo+i468mD7vLubaJz8UdHdGttX3HSBAYRYf3iKt490MrB1pBOycygxchJYWfqRMfxD74FyLjPx5WLK2nuHuCzxi4aMiwHH1IQfBH5nYjsiHO58VReSEQKgeeBbyil4ozj0SilHlVKrVFKramoOM3FtPCvs8s9fKtYalXZMBt3N9NgDS7XM1otAfTm6mwdtwu+7eEnaKC2fkkV2492sjmo329dmd+9gh/dXuH9H8Lh9+G6v5swgeHKxZX0D43yzCYdzqjLppAORAS/dR9sehRW3TEuZXftogpE4PmtR+nqH864YzSh4CulNiillsW5vAA0iUg1gHU9vqZb/y0XLfY/U0r9cirfQFzas8TDz8kDXzFLiwc53tnPu/tb9SlmqDUigABVyxKnZtqC7ys+8/aeScIefnzBv3KRPvl8bvMRqqbl4/eM6IlipfPOloVnD7u9QtNO2PjXsPjzurX2BFw4pxR/npd/+0i35c60cMVpU1StW6LbC7evPqDvr//WuF3LC/NZMbMkY4/RZEM6LwJ3WrfvBF6I3UH0Mv/jwC6l1N9P8vVSoz1oxWZd0PJ2IgIVzMrXYYrQ4Ig+xeyNaQg2Y1mkxUIsfe26LbI3w2vwJvDwl1QXUV3sIzQ4ovu7dxzWA73d6OEXVmrB/9XX9P/2D36QUjdQX66Xy+aXExocwZ/npcLtKZk2IuEmaux9Dfa/Dp/7y4TtSNYvriQ0OAKQcbMCJiv43wWuEpF9wFXWfUSkRkResfa5FPhjYJ2IbLMu18d/uiniZIP7wzk2/nJ8Ayc5d6b20OvCHn6U4Fct09dNcVol262RM528AOQUJByCIiLhfjp6wfag/oMbBT9QoRchT2zXYn8K3UDXWccoa1Iybcrm6/TlVx+AsgVwwV0Jd7U/Rx6B2RnSJdNmUoKvlGpTSq1XSi2wrk9a2xuVUtdbt3+vlBKl1LlKqZXW5ZXkzzxJ2t2fgx8mUA6h1vAXde70PN0/P7qYKCz4ccI6bmirYGPn4idgvXWMwgu24E7BL6zS1ytugyWfP6WHhn8UM6g/zJRQNl+f9Z08ANd+N+l6xzk106ialk/t9ALycjKrdjXDz+PjMDoC3U3uT8m0CVTAkQ+5ec0sgq0hVpWPWNujYviBMh2njJea6SbBT1Bta3Pp/HL+6LyZXH1OFXx0EPKLI1PE3MS8dXDhn8HaB075oVXTfPz5uvmsrnPJZyJV7IXbhdfCgg1JdxUR7rt6EX1WWCeTcJ/ge7y63/3IwMT7ugHLq62Zlsc/3roqIuqx7Y6rlsXP1OnvCPfez3j8ZUmnXvlyvXzvS9YgmZMHx05DcxNFVXDd3572w++7etEUGpMh1F+mB+tc+zcp7X7zmszs0ZVZ5yOp4s1JPLPWbQQq9OKjnW1jC15s3LZikV6Uiq4mBHd5+BOEdMbg1pRMw+lRPFMP1nH5Z8Kdgp9N2MJuhzLs69iGYGXz9ECY6GpCpdwl+P5yCKUg+CNDOl7r8i+3wRCLEfxMxw7d2JWVtuDHhnTsfPPoJlGDPTA67B7BD5TBUEg3vkpG5xH9vo3k8QfZAAAJMUlEQVTgG7IMI/iZju3J24Lf26rnAMSKuC1uJ6MEP1x05YK0TJgwFz+MmzN0DIYkGMHPdGzBt2PXoVYoKNV9dKKZVgs5vojYgXvaKtj4k1fbhnFjl0yDIQWM4Gc6/lLGtMONrbK18Xh0MVqbiwU/SQO1MZw8CLkBd/XBNxhSwAh+puPxatGPjuEnmuBUOjd+SMctgh9ez0hB8EvnujMl02BIghF8NxCoGCv4/gRDucvm6nCGnZrpOsG3iqgmDOkczJ7CPIMhCiP4biBQEfFqE4V0QHu1IwPQdUzfDwu+SxZtfSV68EuyRdvREd1cz8TvDVmIEXw34C/THv7IsBbx2JRMGzs101647WvXDcdyM2cmZ1I8Hn0sOo/A8GD8fbqO6fmuRvANWYgRfDdgh3TsxcpEHn6ZLfhWHL+vwz3hHJviWvj0OXh4Bvy/8+CZ2+Do5sjfTUqmIYtxXy+dbCRQoXvi9Jyw7icQ/KIanZppF1+5qcrW5panIfiuHtretg8OvQfPfhn+2/s6xm8E35DFGMF3A7bAt+zR14lCOnZqpp2H7koPfyasiJrudOJTePRKeOnrcPNTWvBzfLp7qMGQZZiQjhuwBb9519j78SibFxXSccnwk2TMWA7r/zfsegm2/TwyHCe2MM1gyALMp94N2Hn3Lbv1dSIPH3Q6op2amQ2CD3Dxf9etb//9L+DYFhPOMWQtkxJ8ESkVkddFZJ91PS4+ICI+EdkkIp+IyGci8tBkXtMQB1vwm3cBknyoR+m8SGqmG2P48fB44Qs/0imb3cdNDr4ha5msh38/8IZSagHwhnU/lgFgnVJqBbASuFZELprk6xqisQut2oNa7D3exPvamTrNO/Xc02wQfICSWXDD9/Xt8oXptcVgSBOTXbS9EVhr3f4J8Bbwl9E7KKUU0GPdzbUuapKva4jGVwKeHN3yN1k4ByLhjGNb9HW2CD7AuV/Si7o1q9JticGQFibr4VcppY4DWNdxu1GJiFdEtgHNwOtKqQ8TPaGI3CUim0Vkc0tLyyTNyxI8nojQJ1uwhUhqpp2bnk2CD1B3MeT60m2FwZAWJhR8EfmdiOyIc7kx1RdRSo0opVYCM4ELRGRZkn0fVUqtUUqtqahI0ATMMB5b6BP10bHxeLSXn40evsGQ5UwY0lFKJRzhLiJNIlKtlDouItVoDz7Zc3WIyFvAtUCcidqG0yaQoocPWvCbd+rbRvANhqxhsiGdF4E7rdt3Ai/E7iAiFSJSYt0uADYAuyf5uoZY7EydRK2Ro4lOSzSCbzBkDZMV/O8CV4nIPuAq6z4iUiMir1j7VANvish24CN0DP/lSb6uIRZb6CdatIWxgu+W8YYGg2FCJpWlo5RqA9bH2d4IXG/d3g6YtIgzjR27D0wQw4dIaqZ4Ib/ozNlkMBgcham0dQun5OFbgl8w3Ux9MhiyCCP4bmHGcj2ntXzBxPsWVevUTBO/NxiyCtMt0y3UroYHG1Pb107NzAucWZsMBoOjMIKfray9X8fwDQZD1mAEP1tZmnLdnMFgcAkmhm8wGAxZghF8g8FgyBKM4BsMBkOWYATfYDAYsgQj+AaDwZAlGME3GAyGLMEIvsFgMGQJRvANBoMhSxA9ctaZiEgLcOg0H14OtE6hOVOFU+0C59rmVLvAubY51S5wrm1OtQtOzbY6pVTcwRiOFvzJICKblVJr0m1HLE61C5xrm1PtAufa5lS7wLm2OdUumDrbTEjHYDAYsgQj+AaDwZAluFnwH023AQlwql3gXNucahc41zan2gXOtc2pdsEU2ebaGL7BYDAYxuJmD99gMBgMURjBNxgMhizBdYIvIteKyB4R2S8i96fZlh+LSLOI7IjaVioir4vIPuv6rA+WFZFZIvKmiOwSkc9E5B4H2eYTkU0i8oll20NOsc2ywysiH4vIyw6zKygin4rINhHZ7BTbRKRERH4hIrutz9vFDrFrkXWs7EuXiHzDIbbda332d4jIM9Z3YkrscpXgi4gX+CFwHbAUuE1ElqbRpCeBa2O23Q+8oZRaALxh3T/bDAP3KaWWABcBd1vHyQm2DQDrlFIrgJXAtSJykUNsA7gH2BV13yl2AVyplFoZla/tBNt+ALyqlFoMrEAfu7TbpZTaYx2rlcB5QC/wq3TbJiK1wNeBNUqpZYAXuHXK7FJKueYCXAy8FnX/AeCBNNtUD+yIur8HqLZuVwN7HHDcXgCucpptgB/YClzoBNuAmdaXbR3wspP+n0AQKI/ZllbbgGlAA1ZyiFPsimPn1cC7TrANqAWOAKXoEbQvW/ZNiV2u8vCJHCybo9Y2J1GllDoOYF1XptMYEakHVgEf4hDbrLDJNqAZeF0p5RTb/hH4C2A0apsT7AJQwG9FZIuI3OUQ2+YCLcATVhjsMREJOMCuWG4FnrFup9U2pdQx4HvAYeA40KmU+u1U2eU2wZc420zeaQJEpBB4HviGUqor3fbYKKVGlD7VnglcICLL0m2TiHweaFZKbUm3LQm4VCm1Gh3OvFtErki3QWgPdTXwI6XUKiBEekNe4xCRPOAPgefSbQuAFZu/EZgD1AABEbljqp7fbYJ/FJgVdX8m0JgmWxLRJCLVANZ1czqMEJFctNj/TCn1SyfZZqOU6gDeQq+DpNu2S4E/FJEg8CywTkSedoBdACilGq3rZnQs+gIH2HYUOGqdoQH8Av0DkG67orkO2KqUarLup9u2DUCDUqpFKTUE/BK4ZKrscpvgfwQsEJE51i/3rcCLabYplheBO63bd6Lj52cVERHgcWCXUurvHWZbhYiUWLcL0F+A3em2TSn1gFJqplKqHv252qiUuiPddgGISEBEiuzb6JjvjnTbppQ6ARwRkUXWpvXAznTbFcNtRMI5kH7bDgMXiYjf+p6uRy90T41d6VwsOUOLHtcDe4EDwINptuUZdBxuCO3tfBUoQy/87bOuS9Ng12XoUNd2YJt1ud4htp0LfGzZtgP4P9b2tNsWZeNaIou2abcLHSv/xLp8Zn/uHWLbSmCz9f/8NTDdCXZZtvmBNqA4alvabQMeQjs5O4CngPypssu0VjAYDIYswW0hHYPBYDAkwAi+wWAwZAlG8A0GgyFLMIJvMBgMWYIRfIPBYMgSjOAbDAZDlmAE32AwGLKE/w+Tf3mtPM7xRQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAD4CAYAAADlwTGnAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3dfXRV9Z3v8ff3nDw/ESDkgQQFahSBarCRMuJ4sU+COkN7b52rq6Pt1DvqjI4dnZlK2zW3dM0f09vV6fS6xurVGVtd16m11zrDbentqFWpbVUiIhIBQUQJBAhQkvCQkIfv/ePshMMhkJPHfZL9ea111t77t3+/fX6/KOdz9sPZ29wdERGJnljYHRARkXAoAEREIkoBICISUQoAEZGIUgCIiERUVtgdGIqysjKfPXt22N0QEZlQXn/99YPuPiO1fEIFwOzZs2loaAi7GyIiE4qZvT9QuQ4BiYhElAJARCSiFAAiIhE1oc4BiEjm6urqoqmpiY6OjrC7Ell5eXnU1NSQnZ2dVn0FgIiMiqamJoqLi5k9ezZmFnZ3IsfdOXToEE1NTcyZMyetNjoEJCKjoqOjg+nTp+vDPyRmxvTp04e0B6YAEJFRow//cA317x+JAPjl1v1878UdYXdDRCSjRCIAfrX9IN974d2wuyEiY+jQoUPU1dVRV1dHZWUl1dXV/csnT548Z9uGhgbuvvvuQd/jiiuuGJW+vvjii1x//fWjsq2RiMRJ4IqSPI52dnO0s5ui3EgMWSRypk+fzsaNGwFYvXo1RUVF/PVf/3X/+u7ubrKyBv73X19fT319/aDv8Zvf/GZ0OpshIrEHUFGSC8CBNl2eJhIlX/jCF7j33nu5+uqrue+++3jttde44oorWLRoEVdccQXbtm0DTv9Gvnr1ar74xS+ybNky5s6dy/3339+/vaKiov76y5Yt47Of/Szz5s3jc5/7HH1PV1y7di3z5s3jyiuv5O677x70m/7hw4f59Kc/zSWXXMKSJUvYtGkTAC+99FL/HsyiRYtob2+nubmZq666irq6OhYuXMivfvWrEf19IvF1uKI4D4D9bZ3MnVEUcm9EJr9v/N9G3t7bNqrbnD+zhK//wYIht3vnnXd47rnniMfjtLW1sW7dOrKysnjuuef46le/ytNPP31Gm61bt/LCCy/Q3t7ORRddxJ/92Z+dcW39G2+8QWNjIzNnzmTp0qX8+te/pr6+nttvv51169YxZ84cbrrppkH79/Wvf51Fixbxb//2b/zyl7/klltuYePGjXz729/mgQceYOnSpRw9epS8vDwefvhhrrnmGr72ta/R09PD8ePHh/z3SJbWHoCZLTezbWa2w8xWDbDezOz+YP0mM7ssKM8zs9fM7E0zazSzbyS1WW1me8xsY/C6dkQjOYfykkQAHGjXHoBI1Nxwww3E43EAWltbueGGG1i4cCH33HMPjY2NA7a57rrryM3NpaysjPLycvbv339GncWLF1NTU0MsFqOuro5du3axdetW5s6d238dfjoB8PLLL3PzzTcD8LGPfYxDhw7R2trK0qVLuffee7n//vs5cuQIWVlZXH755Xz/+99n9erVvPXWWxQXFw/3zwKksQdgZnHgAeCTQBOw3szWuPvbSdVWALXB66PAg8G0E/iYux81s2zgZTP7ubu/ErT7R3f/9ohGkIa+Q0D7dQhIZFwM55v6WCksLOyf/9u//VuuvvpqnnnmGXbt2sWyZcsGbJObm9s/H4/H6e7uTqtO32GgoRiojZmxatUqrrvuOtauXcuSJUt47rnnuOqqq1i3bh0/+9nPuPnmm/mbv/kbbrnlliG/Z5909gAWAzvcfae7nwSeBFam1FkJPO4JrwClZlYVLB8N6mQHr6H/hUaoKDeLgpw4B9o6x/utRSSDtLa2Ul1dDcAPfvCDUd/+vHnz2LlzJ7t27QLgRz/60aBtrrrqKp544gkgcW6hrKyMkpIS3n33XT784Q9z3333UV9fz9atW3n//fcpLy/nT//0T7n11lvZsGHDiPqbTgBUA7uTlpuCsrTqmFnczDYCB4Bn3f3VpHp3BYeMHjWzqQO9uZndZmYNZtbQ0tKSRncH3Ablxbnsb1cAiETZl7/8Zb7yla+wdOlSenp6Rn37+fn5fO9732P58uVceeWVVFRUMGXKlHO2Wb16NQ0NDVxyySWsWrWKxx57DIDvfve7LFy4kEsvvZT8/HxWrFjBiy++2H9S+Omnn+ZLX/rSiPprg+2ymNkNwDXu/t+C5ZuBxe7+F0l1fgb8vbu/HCw/D3zZ3V9PqlMKPAP8hbtvNrMK4CCJPYK/A6rc/Yvn6kt9fb0P94Ewf/S/fgvAU7f/3rDai8i5bdmyhYsvvjjsboTu6NGjFBUV4e7ceeed1NbWcs8994zb+w/038HMXnf3M65zTWcPoAmYlbRcA+wdah13PwK8CCwPlve7e4+79wKPkDjUNGYqSvJ0GaiIjLlHHnmEuro6FixYQGtrK7fffnvYXTqrdAJgPVBrZnPMLAe4EViTUmcNcEtwNdASoNXdm81sRvDNHzPLBz4BbA2Wq5LafwbYPMKxnFNFcS772zqHdZJGRCRd99xzDxs3buTtt9/miSeeoKCgIOwundWgVwG5e7eZ3QX8AogDj7p7o5ndEax/CFgLXAvsAI4DfxI0rwIeC64kigFPuftPg3XfMrM6EoeAdgFjGpMVJXmc6OqhvbObkrz07pUtIkPj7rohXIiG+gU3rR+CuftaEh/yyWUPJc07cOcA7TYBi86yzZuH1NMRKu//NXCnAkBkDOTl5XHo0CHdEjokfc8DyMvLS7tNJH4JDIk9AEjcDuKCcv0aWGS01dTU0NTUxHCv1pOR63siWLoiFwD79WtgkTGRnZ2d9pOoJDNE4mZwAOXFfb8G1m8BREQgQgFQmJtFUW6WbgchIhKITABA4kSwbgchIpIQqQCoLMljn/YARESAiAXAzNJ89vzuRNjdEBHJCJEKgOrSfPa3d3CyuzfsroiIhC5aATA1H3fY16rDQCIikQqAmtJ8AJqOjOwxaiIik0GkAqB6aiIAdB5ARCRiAVA1JR8z2HNEASAiEqkAyMmKUV6cqz0AEREiFgCQuBKoSQEgIhK9AKiZWqBDQCIiRDAAqqfm09x6gt5ePRlMRKItegFQmk9Xj3OgXfcEEpFoi14A9F0Kqt8CiEjERS4A+n8MphPBIhJxkQuAU3sACgARibbIBUBBThbTCnPYfVgBICLRFrkAADh/egEfHD4WdjdEREIVyQCYPb2QXQd1ElhEoi2yAbC39QQdXT1hd0VEJDTRDICyAtzhg8PaCxCR6IpmAEwvBGDXQZ0HEJHoinYAHFIAiEh0RTIAphRkM7Ugm/d0IlhEIiySAQAwu6xQh4BEJNLSCgAzW25m28xsh5mtGmC9mdn9wfpNZnZZUJ5nZq+Z2Ztm1mhm30hqM83MnjWz7cF06ugNa3Bzy4p4t+XoeL6liEhGGTQAzCwOPACsAOYDN5nZ/JRqK4Da4HUb8GBQ3gl8zN0vBeqA5Wa2JFi3Cnje3WuB54PlcVNbUcSB9k5aj3eN59uKiGSMdPYAFgM73H2nu58EngRWptRZCTzuCa8ApWZWFSz3fc3ODl6e1OaxYP4x4NMjGchQ1ZYXAbCjpX0831ZEJGOkEwDVwO6k5aagLK06ZhY3s43AAeBZd381qFPh7s0AwbR8oDc3s9vMrMHMGlpaWtLobnourCgG4J39OgwkItGUTgDYAGWpj9M6ax1373H3OqAGWGxmC4fSQXd/2N3r3b1+xowZQ2l6TtWl+eRlx9iuABCRiEonAJqAWUnLNcDeodZx9yPAi8DyoGi/mVUBBNMDafd6FMRixgXlRWw/oENAIhJN6QTAeqDWzOaYWQ5wI7Ampc4a4JbgaqAlQKu7N5vZDDMrBTCzfOATwNakNp8P5j8P/PsIxzJkteXF2gMQkcgaNADcvRu4C/gFsAV4yt0bzewOM7sjqLYW2AnsAB4B/jworwJeMLNNJILkWXf/abDum8AnzWw78MlgeVxdWFHMvrYOjhw/Od5vLSISuqx0Krn7WhIf8sllDyXNO3DnAO02AYvOss1DwMeH0tnRNn9mCQBv723jigvKwuyKiMi4i+wvgQEWBAHQuLct5J6IiIy/SAdAWVEuFSW5NO5tDbsrIiLjLtIBALBw5hTtAYhIJEU+ABbMLOHdlqOcOKmng4lItEQ+AObPnEKvw5Z92gsQkWiJfAAsOq8UgDc+OBJyT0RExlfkA6CiJI/q0nw2vP+7sLsiIjKuIh8AAJedP5UNHygARCRaFADAZeeV0tzawd4jJ8LuiojIuFEAAB85P/Ewstd1GEhEIkQBAFxcVUJhTpzX3jscdldERMaNAgDIjsdYPGcav95xMOyuiIiMGwVAYOkFZew8eEznAUQkMhQAgaXB3UC1FyAiUaEACFxUUUxZUQ4vKwBEJCIUAIFYzPhPF5bz4rYWunp6w+6OiMiYUwAk+eT8ClpPdLF+l64GEpHJTwGQ5KoLy8jNivEfjfvD7oqIyJhTACQpyMniygvKePbt/fT2etjdEREZUwqAFH9w6Uz2HDmhw0AiMukpAFJ8akEFhTlxfrJhT9hdEREZUwqAFAU5WSxfWMXat5rp6NJTwkRk8lIADOC/XFZNe2c3z76tk8EiMnkpAAawZO50qqbk8X9ebwq7KyIiY0YBMIBYzPjsR2pYt72FPbo3kIhMUgqAs/ij+lkAPLV+d8g9EREZGwqAs5g1rYArLyjjxw276dFvAkRkElIAnMNNi89jb2sH695pCbsrIiKjTgFwDp+4uILphTn88LUPwu6KiMioUwCcQ05WjM9+pIbntx7gQFtH2N0RERlVaQWAmS03s21mtsPMVg2w3szs/mD9JjO7LCifZWYvmNkWM2s0sy8ltVltZnvMbGPwunb0hjV6/uvls+jpdX6sS0JFZJIZNADMLA48AKwA5gM3mdn8lGorgNrgdRvwYFDeDfyVu18MLAHuTGn7j+5eF7zWjmwoY2PujCKWzJ3Gk+s/0A3iRGRSSWcPYDGww913uvtJ4ElgZUqdlcDjnvAKUGpmVe7e7O4bANy9HdgCVI9i/8fFTYvPY/fhE/zm3UNhd0VEZNSkEwDVQPLF8E2c+SE+aB0zmw0sAl5NKr4rOGT0qJlNHejNzew2M2sws4aWlnCuxrlmQSWlBdn8cL1OBovI5JFOANgAZanHQs5Zx8yKgKeBv3T3tqD4QeBDQB3QDPzDQG/u7g+7e72718+YMSON7o6+vOw4n1lUzX807qP1RFcofRARGW3pBEATMCtpuQbYm24dM8sm8eH/hLv/pK+Cu+939x537wUeIXGoKWNdf8lMunqcX27VDeJEZHJIJwDWA7VmNsfMcoAbgTUpddYAtwRXAy0BWt292cwM+Bdgi7t/J7mBmVUlLX4G2DzsUYyDRbNKqSjJ5edv7Qu7KyIioyJrsAru3m1mdwG/AOLAo+7eaGZ3BOsfAtYC1wI7gOPAnwTNlwI3A2+Z2cag7KvBFT/fMrM6EoeKdgG3j9qoxkAsZixfUMmT63dzrLObwtxB/3QiIhktrU+x4AN7bUrZQ0nzDtw5QLuXGfj8AO5+85B6mgGuWVjJY799n5feaeHaD1cN3kBEJIPpl8BDsHj2NKYV5vDzzToMJCITnwJgCLLiMT4+r5yXth2gu6c37O6IiIyIAmCIrrpwBm0d3Wza0xp2V0RERkQBMERLLyjDDF7efjDsroiIjIgCYIimFeawcOYUfrVdzwgQkYlNATAMv19bxhsfHKG9Q78KFpGJSwEwDFfWltHd67yy83DYXRERGTYFwDB85Pyp5GfHeVmHgURkAlMADENuVpz62VN59T3tAYjIxKUAGKbLZ09j2/523R1URCYsBcAw1c+eijts+OB3YXdFRGRYFADDVDerlKyY0bBLh4FEZGJSAAxTQU4WC6qnsH6X9gBEZGJSAIzA5edP5c3dR+js7gm7KyIiQ6YAGIH62dPo7O5ls+4LJCITkAJgBOpnJ55jr8NAIjIRKQBGoKwol/OmFfDm7iNhd0VEZMgUACN06axSBYCITEgKgBG6tGYKe1s7ONDeEXZXRESGRAEwQnWzSgHYtFsngkVkYlEAjNCCmVOIx4w3m3QYSEQmFgXACOXnxLmoopiNOg8gIhOMAmAU9J0IdvewuyIikjYFwCi4tGYKbR3d7Dp0POyuiIikTQEwCi6pCU4E6zyAiEwgCoBRUFtRRE5WjMa9bWF3RUQkbQqAUZAdjzGvspjGvboUVEQmDgXAKFkws4TGvW06ESwiE4YCYJTMnzmFI8e72HPkRNhdERFJS1oBYGbLzWybme0ws1UDrDczuz9Yv8nMLgvKZ5nZC2a2xcwazexLSW2mmdmzZrY9mE4dvWGNv4UzSwB0HkBEJoxBA8DM4sADwApgPnCTmc1PqbYCqA1etwEPBuXdwF+5+8XAEuDOpLargOfdvRZ4PliesOZVlhAzBYCITBzp7AEsBna4+053Pwk8CaxMqbMSeNwTXgFKzazK3ZvdfQOAu7cDW4DqpDaPBfOPAZ8e4VhClZ8T50MzinhbJ4JFZIJIJwCqgd1Jy02c+hBPu46ZzQYWAa8GRRXu3gwQTMsHenMzu83MGsysoaWlJY3uhmfBzBI279EegIhMDOkEgA1QlnqpyznrmFkR8DTwl+4+pE9Id3/Y3evdvX7GjBlDaTruFlZPYV9bB4eOdobdFRGRQaUTAE3ArKTlGmBvunXMLJvEh/8T7v6TpDr7zawqqFMFHBha1zPPfJ0IFpEJJJ0AWA/UmtkcM8sBbgTWpNRZA9wSXA20BGh192YzM+BfgC3u/p0B2nw+mP888O/DHkWGWFA1BVAAiMjEkDVYBXfvNrO7gF8AceBRd280szuC9Q8Ba4FrgR3AceBPguZLgZuBt8xsY1D2VXdfC3wTeMrMbgU+AG4YvWGFY0pBNjVT89msE8EiMgEMGgAAwQf22pSyh5LmHbhzgHYvM/D5Adz9EPDxoXR2Ilgws4QtzdoDEJHMp18Cj7J5lSXsOniMjq6esLsiInJOCoBRdnFVMb0O2/cfDbsrIiLnpAAYZRdVJq4E2rJPh4FEJLMpAEbZedMKyM+Os7W5PeyuiIickwJglMVjxoWVxWzVHoCIZDgFwBi4uLKYLc16NoCIZDYFwBiYV1nM74530dKuW0KISOZSAIyBvhPBW/fpPICIZC4FwBiYV1kMoPMAIpLRFABjYGphDpUleboSSEQymgJgjMyrKmaLDgGJSAZTAIyReZUl7DjQTldPb9hdEREZkAJgjMyrLKarx9nZcizsroiIDEgBMEbmVelEsIhkNgXAGJlbVkR23HQpqIhkLAXAGMnJivGhGUVs1bMBRCRDKQDG0MVVJdoDEJGMpQAYQxdVFtPc2kHr8a6wuyIicgYFwBjSL4JFJJMpAMbQxVXBw2F0HkBEMpACYAyVF+cytSBb5wFEJCMpAMaQmXFxVYluCSEiGUkBMMbmVZawbV8bPb16OIyIZBYFwBibV1VMR1cv7x/SLSFEJLMoAMbY/P4TwToMJCKZRQEwxi4oLyIeM10KKiIZRwEwxvKy48wtK9QegIhkHAXAOJhXVaLfAohIxlEAjIN5lcXsOXKCtg7dEkJEMkdaAWBmy81sm5ntMLNVA6w3M7s/WL/JzC5LWveomR0ws80pbVab2R4z2xi8rh35cDJT34lgPSNYRDLJoAFgZnHgAWAFMB+4yczmp1RbAdQGr9uAB5PW/QBYfpbN/6O71wWvtUPs+4Shh8OISCZKZw9gMbDD3Xe6+0ngSWBlSp2VwOOe8ApQamZVAO6+Djg8mp2eaCpL8igtyNaJYBHJKOkEQDWwO2m5KSgbap2B3BUcMnrUzKYOVMHMbjOzBjNraGlpSWOTmcfMmFdZrBPBIpJR0gkAG6As9b4G6dRJ9SDwIaAOaAb+YaBK7v6wu9e7e/2MGTMG62vGStwSop1e3RJCRDJEOgHQBMxKWq4B9g6jzmncfb+797h7L/AIiUNNk9b8qhJOdPXw/uHjYXdFRARILwDWA7VmNsfMcoAbgTUpddYAtwRXAy0BWt29+Vwb7TtHEPgMsPlsdSeD/hPBOgwkIhli0ABw927gLuAXwBbgKXdvNLM7zOyOoNpaYCewg8S3+T/va29mPwR+C1xkZk1mdmuw6ltm9paZbQKuBu4ZrUFlogsriokZujW0iGSMrHQqBZdork0peyhp3oE7z9L2prOU35x+Nye+vOw4c8oKdSJYRDKGfgk8juZVlei3ACKSMRQA42h+VQm7D+uWECKSGRQA42jBzMQtITY3tYbcExERBcC4urSmFIA3FQAikgEUAONoamEO500rYFPTkbC7IiKiABhvl9RM4c3dCgARCZ8CYJzVzSplb2sHLe2dYXdFRCJOATDOLgnOA+gwkIiETQEwzhZWlxAzdBhIREKnABhnBTlZ1JYX60ogEQmdAiAEl86awptNR3RraBEJlQIgBIvnTOfI8S7eOaAbw4lIeBQAIfjonGkAvPLuoZB7IiJRpgAIwaxpBVSX5vPqe5F+VLKIhEwBEJKPzp3Gq+8dJnEnbRGR8acACMnvzZ3O4WMneVvPBxCRkCgAQrLsonLM4PktB8LuiohElAIgJDOKc1k0q5TntuwPuysiElEKgBB9Yn4Fm5pa2dfaEXZXRCSCFAAh+tT8SgB+umlvyD0RkShSAITogvIiPnL+VJ549QP9KlhExp0CIGR/vOQ83jt4jN/oR2EiMs4UACFbsbCKsqIc/umF7fpNgIiMKwVAyPKy49z98Vpe2XmYH7/edMZ6d+ed/e28uvMQOw4c1aEiERk1WWF3QOBzHz2fn7+1j6898xbHOru54kNlvHfwKC+9c5AXth5gX9upq4SmFeZw/SVVfGZRNXWzSjGzEHsuIhOZTaTDDvX19d7Q0BB2N8ZE64ku7vrXDfxq+8H+sqLcLH6/toyrLypnZmk+e1tP8NI7LTz39n46u3uZU1bIH146kytry/hw9RTysuMhjkBEMpWZve7u9WeUKwAyh7vzZlMr7x86xnnTClgwcwo5WWcepWvr6OL/bd7HMxv28Mp7h3AHMygrymXmlDxK8rMpzsuiMCeLwtwsinL7pnGKgvK+sr71RXlZFGTHicW0RyEy2SgAJqnDx07SsOswjXvbaG49QXNrB20d3RzrTLyOBtN0Tx0U5sRPC43C3DhFudkU5aaWZ1F8Wp3UsMkaMLxEZPydLQB0DmCCm1aYw6cWVPKpBZVnrePunOjqCcKgpz8YjnZ0c+zkqZA42reuo5ujJ0+FyJ4jJ0616ezmZHdvWn3LicfIz4mTlx0jLztOblZimpcVJzc7Rm7WAOuyY/3rB6qb3CYvO05OVoycePDKSrzi2osRSYsCIALMjIKcLApysqB45Nvr6ulN2rvo6Q+G5D2OvkA5frKbzq5eOrp76OjqobO7l46uHto7ujnYfZLOpLK+9d0jvNIpZpwKhmCanbScHUxz++ZT1ufErT9MspO20bccjxnZcSMrFiM7bsRjMbLiRnYsaV08RlbMyDqtnpHdX35qfXYspkNvEoq0AsDMlgP/E4gD/+zu30xZb8H6a4HjwBfcfUOw7lHgeuCAuy9MajMN+BEwG9gF/JG7/26E45FxkB2PUVqQQ2lBzphsv7unl47uXjq7eugIwiE1RDq7eujo6uVkdy+dPb10dfdysiex3BVMO5PmT/acWX6ssztR3u39bfunQZ2RhlG6zCA7CJLkgMiOJ8riZsRiSdMYZ5ZZImQS8yTmTyuzpLKU9cH0tPVnvGdi3gxiZsQs8eXi1LRv/vTlWNDGkpYTeZe0HEusN/rWD7T907dhKdNT8yn9ISiPJbUltT8Gp7U91c6S2iSPZTIYNADMLA48AHwSaALWm9kad387qdoKoDZ4fRR4MJgC/AD4J+DxlE2vAp5392+a2apg+b7hD0Umi6x4jKJ4jKLc8HdQe3o9ERxJ4dLd43T3Ot1BQHT3OF29feWpU+9v09ObXO9U+66e3uB9krYZ1OsKttXT6/R6Yhs9vfTPnypL1OvsdnocentT1rsnytzp7eWMsp7elPVBmZydnRYmpwfYQOuSw6Uv/CwlkFJDLbn93//nD3P57GmjOoZ0/oUtBna4+87EoO1JYCWQHAArgcc9cUb5FTMrNbMqd29293VmNnuA7a4ElgXzjwEvogCQDJP4VhyP7CW2pwVEMHXAgxDq9cRyrzvufWWJ807Jy33rPWn5VNmp5f62wXv3JrVxkreVCKpT7336+6TVH4Jt954q47SxJN4zud+e0vb05dP7nvx36S9P2jb0jeHUts+sd+q9CnJG///BdAKgGtidtNzEqW/356pTDTSfY7sV7t4M4O7NZlY+UCUzuw24DeC8885Lo7siMlpiMSOGEdH8m/TSuU5voINdqfuG6dQZFnd/2N3r3b1+xowZo7FJEREhvQBoAmYlLdcAqTewT6dOqv1mVgUQTPVsRBGRcZROAKwHas1sjpnlADcCa1LqrAFusYQlQGvf4Z1zWAN8Ppj/PPDvQ+i3iIiM0KAB4O7dwF3AL4AtwFPu3mhmd5jZHUG1tcBOYAfwCPDnfe3N7IfAb4GLzKzJzG4NVn0T+KSZbSdxhdFpl5aKiMjY0q0gREQmubPdCkI3axERiSgFgIhIRCkAREQiakKdAzCzFuD9YTYvAw4OWmty0ZijQWOOhpGM+Xx3P+OHVBMqAEbCzBoGOgkymWnM0aAxR8NYjFmHgEREIkoBICISUVEKgIfD7kAINOZo0JijYdTHHJlzACIicroo7QGIiEgSBYCISERFIgDMbLmZbTOzHcHjJycFM3vUzA6Y2eaksmlm9qyZbQ+mU5PWfSX4G2wzs2vC6fXwmdksM3vBzLaYWaOZfSkon8xjzjOz18zszWDM3wjKJ+2Y+5hZ3MzeMLOfBsuTesxmtsvM3jKzjWbWEJSN7Zg9eJzaZH2ReJD9u8BcIAd4E5gfdr9GaWxXAZcBm5PKvgWsCuZXAf8jmJ8fjD0XmBP8TeJhj2GI460CLgvmi4F3gnFN5jEbUBTMZwOvAksm85iTxg8bsicAAAJESURBVH4v8K/AT4PlST1mYBdQllI2pmOOwh5A/zON3f0k0PdM4wnP3dcBh1OKV5J4xjLB9NNJ5U+6e6e7v0fi1t2Lx6Wjo8QTz5jeEMy3k7g9eTWTe8zu7keDxezg5UziMQOYWQ1wHfDPScWTesxnMaZjjkIAnO15xZPVac9aBvqetTyp/g5mNhtYROIb8aQec3AoZCOJp+Y96+6TfszAd4EvA71JZZN9zA78h5m9HjwLHcZ4zOk8FH6iG7PnFU8wk+bvYGZFwNPAX7p7m9lAQ0tUHaBswo3Z3XuAOjMrBZ4xs4XnqD7hx2xm1wMH3P11M1uWTpMByibUmANL3X2vmZUDz5rZ1nPUHZUxR2EPYDjPK57Izvas5UnxdzCzbBIf/k+4+0+C4kk95j7ufgR4EVjO5B7zUuAPzWwXiUO2HzOz/83kHjPuvjeYHgCeIXFIZ0zHHIUASOeZxpPJ2Z61vAa40cxyzWwOUAu8FkL/hs0SX/X/Bdji7t9JWjWZxzwj+OaPmeUDnwC2MonH7O5fcfcad59N4t/rL939j5nEYzazQjMr7psHPgVsZqzHHPaZ73E6u34tiStG3gW+FnZ/RnFcPwSagS4S3whuBaYDzwPbg+m0pPpfC/4G24AVYfd/GOO9ksRu7iZgY/C6dpKP+RLgjWDMm4H/HpRP2jGnjH8Zp64CmrRjJnGV4pvBq7Hvc2qsx6xbQYiIRFQUDgGJiMgAFAAiIhGlABARiSgFgIhIRCkAREQiSgEgIhJRCgARkYj6/+JniYRvPS7kAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "import util\n",
    "from train import *\n",
    "from generate_data import *\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "input_size = 20\n",
    "\n",
    "# Data params\n",
    "noise_var = 0\n",
    "num_datapoints = 100\n",
    "test_size = 0.2\n",
    "num_train = int((1-test_size) * num_datapoints)\n",
    "\n",
    "data = ARData(num_datapoints, num_prev=input_size, test_size=test_size, noise_var=noise_var, \\\n",
    "              coeffs=fixed_ar_coefficients[input_size])\n",
    "X_train =data.X_train\n",
    "y_train =data.y_train\n",
    "\n",
    "hidden_size = 32\n",
    "lstm_input_size = input_size\n",
    "output_dim = 1\n",
    "num_layers = 2\n",
    "batch_size =num_train #80\n",
    "\n",
    "X_train = X_train.reshape(input_size, -1, 1)\n",
    "print(X_train.shape)\n",
    "X_train = X_train.reshape(len(X_train), batch_size, 1)\n",
    "print(X_train.shape)\n",
    "X_train = X_train.swapaxes(0,2)\n",
    "y_train = y_train.reshape(-1,1)\n",
    "\n",
    "model = LSTM_RNN(lstm_input_size, hidden_size,  output_size=output_dim, num_layers=num_layers)\n",
    "\n",
    "loss_fn = util.mse_loss_grad#(f,y)#torch.nn.MSELoss(size_average=False)\n",
    "\n",
    "learning_rate = 1e-3\n",
    "momentum = 0.9\n",
    "#optimizer = SGD(model.parameters(),learning_rate,momentum)\n",
    "optimizer = Adam(model.parameters(),learning_rate)\n",
    "num_epochs = 500\n",
    "\n",
    "print(X_train.shape)\n",
    "hist = np.zeros(num_epochs)\n",
    "for t in range(num_epochs):    \n",
    "    model.hidden = model.init_hidden(batch_size)   \n",
    "    y_pred = model(X_train) # Forward pass\n",
    "    \n",
    "    loss,grad = loss_fn(y_pred, y_train)\n",
    "    if t % 100 == 0:\n",
    "        print(\"Epoch \", t, \"MSE: \", loss)        \n",
    "    hist[t] = loss\n",
    "\n",
    "    optimizer.zero_grad()  # Zero out gradient, else they will accumulate between epochs \n",
    "    model.backward(grad,X_train)# Backward pass   \n",
    "    optimizer.step() # Update parameters\n",
    "    \n",
    "plt.plot(y_pred, label=\"Preds\")\n",
    "plt.plot(y_train, label=\"Data\")\n",
    "plt.legend()\n",
    "plt.show()\n",
    "\n",
    "plt.plot(hist, label=\"Training loss\")\n",
    "plt.legend()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 7.10.3 双向循环神经网络\n",
    "\n",
    "下面的RNNLayer表示了一个循环神经网络层。其构造函数的一个参数mode表示不同类型的循环神经网络单元，参数reverse表示这个神经网络层是正向的还是反向的。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [],
   "source": [
    "from Layers import *\n",
    "#from rnn import *\n",
    "class RNNLayer(Layer):\n",
    "    def __init__(self,mode,input_size, hidden_size,bias=True, batch_first=False,reverse=False):\n",
    "        super(RNNLayer, self).__init__()\n",
    "        self.mode = mode\n",
    "        if mode == 'RNN_TANH':\n",
    "            self.cell = RNNCell(input_size, hidden_size,bias,nonlinearity=\"tanh\")\n",
    "        elif mode == 'RNN_RELU':\n",
    "            self.cell = RNNCell(input_size, hidden_size,bias,nonlinearity=\"relu\")\n",
    "        elif mode == 'LSTM':\n",
    "            self.cell = LSTMCell(input_size, hidden_size,bias)\n",
    "        elif mode == 'GRU':\n",
    "            self.cell = GRUCell(input_size, hidden_size,bias)        \n",
    "        self.reverse = reverse\n",
    "        self.batch_first = batch_first\n",
    "        self.zs = None\n",
    "        \n",
    "    def init_hidden(self, batch_size):\n",
    "        #self.h = np.random.zeros(batch_size, self.hidden_dim)\n",
    "        self.h = self.cell.init_hidden(batch_size)\n",
    "        return self.h  \n",
    "    \n",
    "    def forward(self,input,h=None,batch_sizes = None):\n",
    "        mode = self.mode\n",
    "        if self.batch_first and batch_sizes is None:\n",
    "            input = input.transpose(0, 1)\n",
    "        seq_len,batch_size = input.shape[0], input.shape[1]   \n",
    "        if h is None:  \n",
    "            h = self.init_hidden(batch_size)\n",
    "        self.h = h #h.copy()\n",
    "        \n",
    "        output = []\n",
    "        zs=[] \n",
    "        hs = []\n",
    "        steps = range(seq_len - 1, -1, -1) if self.reverse else range(seq_len)\n",
    "        for t in steps:\n",
    "            h = self.cell(input[t], h)\n",
    "            #h,z = self.cell(input[t], h)\n",
    "            #output.append(h) \n",
    "            if isinstance(h, tuple):\n",
    "                h,z = h[0],h[1]\n",
    "                if mode == 'LSTM' or mode == 'GRU':\n",
    "                    zs.append(z)            \n",
    "            hs.append(h)           \n",
    "        \n",
    "        self.hs = np.array(hs)\n",
    "        output = [h[0] if isinstance(h, tuple) else h for h in self.hs]\n",
    "        if mode == 'LSTM' or mode == 'GRU':\n",
    "            self.zs = np.array(zs)\n",
    "        return np.array(output),h\n",
    "    \n",
    "    def __call__(self,input,h=None,batch_sizes = None):\n",
    "            return self.forward(input,h,batch_sizes)\n",
    "        \n",
    "    def backward(self, dhs,input):#,hs):     \n",
    "        if False:\n",
    "            if hs is None:\n",
    "                hs,_ = self.forward(input)\n",
    "        else:\n",
    "            if self.hs is None:\n",
    "                self.hs,_ = self.forward(input)\n",
    "            hs = self.hs\n",
    "        \n",
    "        if False:\n",
    "            if self.zs is  None:\n",
    "                zs = hs\n",
    "            else:\n",
    "                zs = self.zs\n",
    "        zs = self.zs if self.zs is not None else hs\n",
    "      \n",
    "        seq_len,batch_size = input.shape[0], input.shape[1]\n",
    "        cell = self.cell\n",
    "      \n",
    "        if len(dhs)==len(hs):#.shape==hs.shape: #(seq,batch,hidden)          \n",
    "            dinput = [None for i in range(seq_len)]\n",
    "            steps = range(seq_len)   if self.reverse else range(seq_len - 1, -1, -1)\n",
    "            t0 = seq_len - 1 if self.reverse else 0 \n",
    "            dh = np.zeros_like(dhs[0]) #来自后一时刻的梯度\n",
    "            for t in steps:  \n",
    "                dh += dhs[t]          #后一时刻的梯度+当前时刻的梯度\n",
    "                h_pre = self.h if t==t0 else hs[t-1]             \n",
    "                dx,dh,_ = cell.backward(dh,zs[t],input[t],h_pre)                \n",
    "              \n",
    "                dinput[t] = dx\n",
    "        return dinput"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "测试一下这个循环神经网络层类："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "output [[[ 0.98529853  0.54226394  0.87067973  0.11520828 -1.14269267\n",
      "    0.42582622]\n",
      "  [-0.13334087  0.04401171 -0.38530585  0.07464463 -0.20277516\n",
      "   -0.09243082]\n",
      "  [ 0.61308805  1.13157874 -0.37547481 -0.93738494 -0.72714967\n",
      "   -0.13471224]]\n",
      "\n",
      " [[ 0.12271983  0.72744884  0.07499603  0.19624746 -0.97130027\n",
      "    0.24047751]\n",
      "  [-0.10838374  0.36549895 -0.53044892  0.21920808 -0.41894674\n",
      "   -0.12161373]\n",
      "  [ 0.5756363   0.66192994  0.34478068 -0.41911888 -0.36571545\n",
      "   -0.21699069]]\n",
      "\n",
      " [[ 0.34313284  0.47791973 -0.07421252  0.20662352 -0.87125816\n",
      "   -0.02436402]\n",
      "  [ 0.09468831  0.31269736 -0.31467632  0.29748552 -0.54153618\n",
      "   -0.2947006 ]\n",
      "  [-0.02346966  0.63947901 -0.47209297 -0.04324441 -0.51834788\n",
      "   -0.01982571]]\n",
      "\n",
      " [[ 0.19181091  0.43937988  0.01072851  0.30992938 -0.64633399\n",
      "   -0.29797874]\n",
      "  [ 0.30171129  0.34000351  0.21123339  0.26620739  0.0670621\n",
      "   -0.44782343]\n",
      "  [ 0.30767592  0.60060431 -0.19535927 -0.0969966  -0.56337432\n",
      "    0.35244878]]\n",
      "\n",
      " [[ 0.11799502  0.29109034  0.43016514  0.03683311 -0.57986611\n",
      "   -0.17879448]\n",
      "  [ 0.36295293  0.64516397  0.239593    0.2867203  -0.0407612\n",
      "   -0.25947846]\n",
      "  [ 0.18901916  0.49459898 -0.4339055   0.08245769 -0.64307492\n",
      "   -0.02548672]]]\n",
      "hn [[ 0.11799502  0.29109034  0.43016514  0.03683311 -0.57986611 -0.17879448]\n",
      " [ 0.36295293  0.64516397  0.239593    0.2867203  -0.0407612  -0.25947846]\n",
      " [ 0.18901916  0.49459898 -0.4339055   0.08245769 -0.64307492 -0.02548672]]\n",
      "dinput: [array([[-0.23489668, -0.12348896, -0.05190273,  0.57847168],\n",
      "       [-0.15558714, -0.13020407, -0.05741823,  0.1094274 ],\n",
      "       [-0.14672895,  0.07799611, -0.21999369,  0.13940936]]), array([[ 0.13680377,  0.08637206,  0.21385975, -0.41469662],\n",
      "       [ 0.10127119,  0.17435545,  0.16505576, -0.03071139],\n",
      "       [ 0.06449638,  0.06019229,  0.01392103,  0.04667557]]), array([[ 0.12388398, -0.2657362 ,  0.0933225 , -0.35417794],\n",
      "       [-0.34105004,  0.74396999, -0.32571019,  0.36628756],\n",
      "       [ 0.15553805,  0.04390188,  0.0305535 ,  0.06263394]]), array([[-0.38271394,  0.2701076 , -0.90961594,  0.64856717],\n",
      "       [-0.08300746, -0.07048701, -0.03863535, -0.01807233],\n",
      "       [ 0.01510749, -0.05689009, -0.013502  , -0.03747616]]), array([[ 0.10931122,  0.10393491, -0.33652039,  0.15005094],\n",
      "       [-0.14981198, -0.86746276,  0.57690117, -0.07357825],\n",
      "       [ 0.18013041,  0.26644269,  0.04031855, -0.0781936 ]])]\n"
     ]
    }
   ],
   "source": [
    "#test_LSTM=\"LSTM\"\n",
    "test_LSTM=\"GRU\"\n",
    "reverse = True\n",
    "np.random.seed(1)\n",
    "\n",
    "seq_len,batch_size,input_size,hidden_size = 5,3,4,6\n",
    "\n",
    "if  test_LSTM==\"RNN_TANH\":\n",
    "    rnn_ = RNNLayer(\"RNN_TANH\",input_size, hidden_size,reverse = reverse)\n",
    "elif test_LSTM==\"GRU\":\n",
    "    rnn_ = RNNLayer('GRU',input_size, hidden_size,reverse = reverse)\n",
    "else:\n",
    "    rnn_ = RNNLayer('LSTM',input_size, hidden_size,reverse = reverse)\n",
    "\n",
    "input  = np.random.randn(seq_len,batch_size,input_size) \n",
    "if reverse:\n",
    "    input = input[::-1]\n",
    "\n",
    "h0 = np.random.randn(batch_size, hidden_size)\n",
    "c0 = np.random.randn(batch_size, hidden_size)\n",
    "\n",
    "if  test_LSTM==\"LSTM\":\n",
    "    output,hn= rnn_(input, (h0,c0))\n",
    "else:\n",
    "    output,hn= rnn_(input, h0)\n",
    "print(\"output\",output)\n",
    "print(\"hn\",hn)\n",
    "\n",
    "#------test backward---    \n",
    "do = np.random.randn(*output.shape)\n",
    "dinput = rnn_.backward(do,input)#,rnn_.hs)#output)\n",
    "print(\"dinput:\",dinput)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "在上述循环神经网络层的基础上，可以很方便地实现多层双向循环神经网络，RNNBase实现了一个双向多层循环神经网络："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [],
   "source": [
    "from Layers  import *\n",
    "class RNNBase_(Layer):\n",
    "    __constants__ = ['mode', 'input_size', 'hidden_size', 'num_layers', 'bias',\n",
    "                     'batch_first', 'dropout', 'bidirectional']\n",
    "\n",
    "    def __init__(self, mode, input_size, hidden_size,\n",
    "                 num_layers=1, bias=True, batch_first=False,\n",
    "                 dropout=0., bidirectional=False):\n",
    "        super(RNNBase_, self).__init__()\n",
    "        self.mode = mode\n",
    "        self.input_size = input_size\n",
    "        self.hidden_size = hidden_size\n",
    "        self.num_layers = num_layers\n",
    "        self.bias = bias\n",
    "        self.batch_first = batch_first\n",
    "        self.dropout = float(dropout)\n",
    "        self.bidirectional = bidirectional\n",
    "        num_directions = 2 if bidirectional else 1\n",
    "        self.num_directions = num_directions\n",
    "\n",
    "        if not isinstance(dropout, float) or not 0 <= dropout <= 1 or \\\n",
    "                isinstance(dropout, bool):\n",
    "            raise ValueError(\"dropout should be a number in range [0, 1] \"\n",
    "                             \"representing the probability of an element being \"\n",
    "                             \"zeroed\")\n",
    "        if dropout > 0 and num_layers == 1:\n",
    "            warnings.warn(\"dropout option adds dropout after all but last \"\n",
    "                          \"recurrent layer, so non-zero dropout expects \"\n",
    "                          \"num_layers greater than 1, but got dropout={} and \"\n",
    "                          \"num_layers={}\".format(dropout, num_layers))\n",
    "\n",
    "        if False:\n",
    "            if mode == 'LSTM':\n",
    "                gate_size = 4 * hidden_size\n",
    "            elif mode == 'GRU':\n",
    "                gate_size = 3 * hidden_size\n",
    "            elif mode == 'RNN_TANH':\n",
    "                gate_size = hidden_size\n",
    "            elif mode == 'RNN_RELU':\n",
    "                gate_size = hidden_size\n",
    "            else:\n",
    "                raise ValueError(\"Unrecognized RNN mode: \" + mode)\n",
    "\n",
    "        self.layers = []\n",
    "        self.params = []\n",
    "        self.grads = []\n",
    "        self._all_weights = []\n",
    "        for layer in range(num_layers):\n",
    "            layer_input_size = input_size if layer == 0 else hidden_size\n",
    "            for direction in range(num_directions):\n",
    "                if direction==0:\n",
    "                    rnnlayer = RNNLayer(mode,layer_input_size, hidden_size,reverse = False)\n",
    "                else:\n",
    "                    rnnlayer = RNNLayer(mode,layer_input_size, hidden_size,reverse = True)\n",
    "                self.layers.append(rnnlayer)\n",
    "                \n",
    "                self.params+=   rnnlayer.cell.params\n",
    "                self.grads+=   rnnlayer.cell.grads\n",
    "    def init_hidden(self, batch_size):\n",
    "        num_layers,num_directions = self.num_layers,self.num_directions\n",
    "        selh.h0 = []\n",
    "        for layer in self.layers:\n",
    "            h0 = layer.init_hidden(batch_size)  \n",
    "            selh.h0.append(h0)\n",
    "        return self.h0\n",
    "    \n",
    "    def forward(self,input,h=None,batch_sizes = None):\n",
    "        num_layers,num_directions = self.num_layers,self.num_directions  \n",
    "        mode = self.mode  \n",
    "        seq_len,batch_size = input.shape[0], input.shape[1]   \n",
    "        if h is None:  \n",
    "            h = self.init_hidden(batch_size)\n",
    "        self.h = h #h.copy()\n",
    "        hs = []\n",
    "        hns = []                                \n",
    "        for i in range(num_layers):  \n",
    "            for j in range(num_directions):\n",
    "                l=  i*num_directions+j     \n",
    "                x = input if i == 0 else hs[l-num_directions]\n",
    "                layer = self.layers[l]\n",
    "                print(i,j,x.shape,h[l].shape)\n",
    "                output,hn = layer(x,h[l])\n",
    "                hs.append(output)\n",
    "                hns.append(hn)\n",
    "        self.hs = np.array(hs)\n",
    "        #return output,hns\n",
    "        output = self.hs[-1] if num_directions==1 else self.hs[-num_directions:]\n",
    "        return output,np.array(hns)\n",
    "        #return self.hs[-num_directions:],np.array(hns)\n",
    "    \n",
    "    def __call__(self,input,h=None,batch_sizes = None):\n",
    "        return self.forward(input,h,batch_sizes)\n",
    "    \n",
    "    def backward(self, dhs,input):#,hs):  \n",
    "        num_layers,num_directions = self.num_layers,self.num_directions\n",
    "        if False:        \n",
    "            if hs is None:\n",
    "                hs,_ = self.forward(input)\n",
    "        else:\n",
    "            if self.hs is None:\n",
    "                self.hs,_ = self.forward(input)\n",
    "            hs = self.hs\n",
    "        \n",
    "        dhs = [dhs[j] for j in range(num_directions)] if num_directions==2 else [dhs]\n",
    "        for i in reversed(range(num_layers)):  \n",
    "            for j in (range(num_directions)):\n",
    "                l=  i*num_directions+j   \n",
    "                layer = self.layers[l]\n",
    "                if i==0:\n",
    "                    x = input\n",
    "                else:\n",
    "                    x = self.layers[l-num_directions].hs\n",
    "                dhs[j] = layer.backward(dhs[j],x)\n",
    "        \n",
    "        return dhs"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "测试一下这个RNNBase_类："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 0 (5, 3, 4) (3, 6)\n",
      "1 0 (5, 3, 6) (3, 6)\n",
      "output.shape (5, 3, 6)\n",
      "output [[[ 3.24486260e-01  6.56674130e-01  8.68435304e-01  5.09315941e-01\n",
      "    5.29784714e-01 -1.05821617e+00]\n",
      "  [-4.45235395e-01 -8.78507584e-04  4.82631427e-03  1.92588270e-02\n",
      "   -2.76288768e-01 -1.64919319e-01]\n",
      "  [-1.18117119e-01  6.58384960e-01 -5.01438389e-01  2.11906306e-01\n",
      "    7.13512787e-01 -1.10509111e-01]]\n",
      "\n",
      " [[ 1.28929605e-01  3.34295888e-01  8.06758802e-01 -1.71076528e-02\n",
      "    5.93009642e-01 -8.17782989e-01]\n",
      "  [-5.68243320e-01 -2.31040963e-01  8.08523747e-02 -6.52589763e-02\n",
      "    1.64704303e-01 -2.64589517e-01]\n",
      "  [ 2.41632617e-02  2.03820943e-01 -5.26430939e-01 -1.68229509e-01\n",
      "    5.49005106e-01 -6.66190458e-02]]\n",
      "\n",
      " [[-9.59243983e-02  1.39907427e-01  6.87706734e-01 -1.99017474e-01\n",
      "    6.20206686e-01 -6.20928098e-01]\n",
      "  [-5.70379783e-01 -2.27599285e-01  2.93622403e-02 -6.17578519e-02\n",
      "    3.58629218e-01 -1.63406067e-01]\n",
      "  [-1.32293952e-01 -8.84043163e-02 -4.81944935e-01 -1.72222810e-01\n",
      "    4.80312719e-01 -1.44052329e-01]]\n",
      "\n",
      " [[-2.33828461e-01 -7.94904476e-02  3.78104418e-01 -2.71385688e-01\n",
      "    5.63950924e-01 -5.28886447e-01]\n",
      "  [-5.89455407e-01 -3.06188324e-01  1.79936900e-01 -2.99958067e-02\n",
      "    4.63449151e-01 -2.46309325e-01]\n",
      "  [-3.21599178e-01 -2.70264480e-01 -2.88450946e-01 -1.01845976e-01\n",
      "    4.89243868e-01 -2.96653100e-01]]\n",
      "\n",
      " [[-3.37791446e-01 -1.35121176e-01  9.17178348e-02 -2.07543708e-01\n",
      "    5.00013950e-01 -3.60088562e-01]\n",
      "  [-6.07269374e-01 -3.49723084e-01  1.78905060e-01 -1.72220078e-02\n",
      "    5.12119093e-01 -2.48211954e-01]\n",
      "  [-4.24259363e-01 -2.86555458e-01 -1.68159396e-01 -4.35218931e-02\n",
      "    4.90992103e-01 -2.67618575e-01]]]\n",
      "dinput: [[array([[ 0.29715672,  0.36176309,  0.03057361, -0.15006458],\n",
      "       [-0.19732804,  0.07178479, -0.00349377, -0.04084217],\n",
      "       [ 0.12726689,  0.13620428, -0.00775011, -0.13197159]]), array([[ 0.25373234,  0.20271913,  0.01523221, -0.13262727],\n",
      "       [-0.17376956,  0.21947404,  0.16231807, -0.14760013],\n",
      "       [ 0.09063624,  0.01170992, -0.11187428,  0.02442987]]), array([[ 0.13731726,  0.05736737, -0.01935522,  0.07918607],\n",
      "       [-0.14284039,  0.15649727,  0.15710445, -0.10660712],\n",
      "       [-0.00252188, -0.0009455 ,  0.10236473, -0.13039081]]), array([[ 0.14238246, -0.02298069, -0.05433976,  0.04703113],\n",
      "       [-0.02046404,  0.017567  ,  0.08004208, -0.11585003],\n",
      "       [ 0.01844267, -0.0293929 ,  0.12016862, -0.18702739]]), array([[-0.03153968,  0.03676516, -0.04423979,  0.07732338],\n",
      "       [-0.00162189, -0.002211  ,  0.01068066, -0.00083353],\n",
      "       [ 0.06420445, -0.05281708,  0.05388969, -0.09821359]])]]\n"
     ]
    }
   ],
   "source": [
    "import numpy  as np\n",
    "np.random.seed(1)\n",
    "reverse = False\n",
    "num_layers = 2\n",
    "\n",
    "seq_len,batch_size,input_size,hidden_size = 5,3,4,6\n",
    "\n",
    "input = np.random.randn(seq_len,batch_size,input_size) \n",
    "test_LSTM = 'GRU'\n",
    "if  test_LSTM==\"RNN_TANH\":\n",
    "    rnn = RNNBase_(\"RNN_TANH\",input_size,hidden_size,num_layers)\n",
    "elif test_LSTM==\"GRU\":\n",
    "    rnn = RNNBase_('GRU',input_size,hidden_size,num_layers)\n",
    "else:\n",
    "    rnn = RNNBase_('LSTM',input_size,hidden_size,num_layers)\n",
    "    \n",
    "h_0 = np.random.randn(num_layers, batch_size, hidden_size) \n",
    "output, hn = rnn(input, h_0)\n",
    "print(\"output.shape\",output.shape)  #(seq_len,batch_size,hidden_size)\n",
    "print(\"output\",output) \n",
    "\n",
    "do = np.random.randn(*output.shape)\n",
    "dinput = rnn.backward(do,input)\n",
    "print(\"dinput:\",dinput)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7.11 序列到序列（seq2seq）模型\n",
    "\n",
    "### 7.11.2 Seq2Seq模型的实现\n",
    "\n",
    "下面的编码器EncoderRNN就是一个GRU神经网络，和前面的GRU类唯一不同的就是添加了一个辅助函数 word2vec()，将单词索引序列word_indices_input转化为one-hot向量，EncoderRNN就是一个GRU，因此其构造函数的参数和GRU的参数是一样的，input_size, hidden_size分别表示输入数据的长度和隐状态向量的长度。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#from rnn import *\n",
    "def one_hot(size,indices,expend = False):\n",
    "    x =  np.eye(size)[indices.reshape(-1)]\n",
    "    if expend:\n",
    "        x = np.expand_dims(x, axis=1) \n",
    "    return x\n",
    "\n",
    "class EncoderRNN(object):\n",
    "    def __init__(self, input_size, hidden_size,num_layers = 1):\n",
    "        super(EncoderRNN, self).__init__()\n",
    "        self.input_size,self.hidden_size = input_size,hidden_size\n",
    "        self.num_layers = num_layers\n",
    "        #self.embedding = Embedding(input_size, hidden_size)\n",
    "        self.gru = GRU(input_size, hidden_size,1)\n",
    "\n",
    "    def word2vec(self,word_indices_input):\n",
    "        return one_hot(self.input_size,word_indices_input,True)  \n",
    "    \n",
    "    def forward(self, word_indices_input, hidden):\n",
    "        #self.encode_input = one_hot(self.input_size,word_indices_input,True) \n",
    "        self.encode_input =self.word2vec(word_indices_input)       \n",
    "        output, hidden = self.gru(self.encode_input, hidden)       \n",
    "        return output, hidden\n",
    "      \n",
    "    def __call__(self,word_indices_input, hidden):\n",
    "        return self.forward(word_indices_input, hidden)\n",
    "    \n",
    "    def initHidden(self,batch_size=1):       \n",
    "        return   np.zeros((self.num_layers, batch_size, self.hidden_size))      \n",
    "    \n",
    "    def parameters(self):\n",
    "        return self.gru.parameters()\n",
    " \n",
    "    def backward(self,dhs): \n",
    "        dinput,dhidden = self.gru.backward(dhs,self.encode_input) "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "解码器DecoderRNN类的代码如下："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class  DecoderRNN(object):\n",
    "    def __init__(self,input_size,hidden_size, output_size,num_layers=1,teacher_forcing_ratio = 0.5):\n",
    "       # super(DecoderRNN, self).__init__()    \n",
    "        super().__init__()    \n",
    "        self.input_size = input_size\n",
    "        self.hidden_size = hidden_size       \n",
    "        self.num_layers = num_layers   \n",
    "        self.teacher_forcing_ratio = teacher_forcing_ratio\n",
    "        \n",
    "        self.gru = GRU(input_size,hidden_size,num_layers)\n",
    "        self.out = Dense(hidden_size, output_size)\n",
    "        \n",
    "        self.layers = [self.gru,self.out]        \n",
    "        self._params = None        \n",
    "\n",
    "    def initHidden(self,batch_size=1):      \n",
    "        self.h_0 =  np.zeros((self.num_layers, batch_size, self.hidden_size))\n",
    "    \n",
    "    def word2vec(self,input_t):\n",
    "        return one_hot(self.input_size,input_t,True)\n",
    "    \n",
    "    def forward_step(self, input_t, hidden):        \n",
    "        gru_input = self.word2vec(input_t)        \n",
    "        self.input.append(gru_input) \n",
    "        output_hs, hidden = self.gru(gru_input,hidden)\n",
    "        output = self.out(output_hs[0])        \n",
    "        return output,hidden,output_hs[0]     \n",
    "   \n",
    "    def forward(self,input_tensor,hidden):   \n",
    "        teacher_forcing_ratio = self.teacher_forcing_ratio\n",
    "        use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False\n",
    "        #use_teacher_forcing = True\n",
    "        self.input = []        \n",
    "        \n",
    "        output_hs = []\n",
    "        output = []\n",
    "        hidden_t = hidden\n",
    "        h_0 = hidden.copy()\n",
    "        \n",
    "        input_t = np.array([SOS_token])\n",
    "        #input_seq = []       \n",
    "        hs = []\n",
    "        zs = []\n",
    "     \n",
    "        target_length = input_tensor.shape[0]\n",
    "        for t in range(target_length):\n",
    "           # input_seq.append(input_t[0])        \n",
    "           # print(\"type(input_t),input_t\",type(input_t),input_t)\n",
    "            \n",
    "            output_t, hidden_t,output_hs_t = self.forward_step(\n",
    "                input_t, hidden_t)\n",
    "           \n",
    "            #保存每一时刻的计算结果\n",
    "            hs.append(self.gru.hs) #隐状态\n",
    "            zs.append(self.gru.zs) #中间变量            \n",
    "            output_hs.append(output_hs_t)\n",
    "            output.append(output_t)\n",
    "           \n",
    "            if use_teacher_forcing:\n",
    "                input_t = input_tensor[t]  # Teacher forcing\n",
    "            else:             \n",
    "                input_t = np.argmax(output_t)  #最大概率\n",
    "                if input_t== EOS_token:\n",
    "                    break \n",
    "                input_t = np.array([input_t])\n",
    "         \n",
    "        output = np.array(output)\n",
    "        self.output_hs = np.array(output_hs)\n",
    "        self.h_0 = h_0         \n",
    "        self.hs = np.concatenate(hs, axis=1)\n",
    "        self.zs = np.concatenate(zs, axis=1)\n",
    "        \n",
    "        #self.input_seq = input_seq        \n",
    "        #return  output,input_seq\n",
    "        return  output    \n",
    "    \n",
    "    def __call__(self, input, hidden):\n",
    "        return self.forward(input, hidden)       \n",
    "    \n",
    "    def evaluate(self, hidden,max_length):\n",
    "        # input:(1, batch_size=1, input_size)  \n",
    "        input = np.array([SOS_token])\n",
    "        decoded_word_indices = []\n",
    "        for t in range(max_length):         \n",
    "            output,hidden,_ = self.forward_step(input, hidden)\n",
    "            output = np.argmax(output)         \n",
    "            if output==EOS_token:\n",
    "                break;\n",
    "            else:           \n",
    "                decoded_word_indices.append(output)\n",
    "                input = np.array([output])\n",
    "                \n",
    "        return decoded_word_indices\n",
    "        #return  indexToSentence(output_lang,decoded_words)\n",
    "        #return  indexToSentence(output_verb,decoded_words)       \n",
    "    \n",
    "    def backward(self,dZs):\n",
    "        dhs = []\n",
    "        #output_hs,input_seq = self.output_hs,self.input_seq       \n",
    "        output_hs = self.output_hs\n",
    "        input = np.concatenate(self.input,axis=0)        \n",
    "        \n",
    "        for i in range(len(input)):\n",
    "            self.out.x = output_hs[i]\n",
    "            dh = self.out.backward(dZs[i])\n",
    "            dhs.append(dh)\n",
    "        dhs = np.array(dhs)\n",
    "        \n",
    "        self.gru.hs = self.hs\n",
    "        self.gru.zs = self.zs\n",
    "        self.gru.h = self.h_0\n",
    "      \n",
    "        dinput,dhidden = self.gru.backward(dhs,input) \n",
    "        return dinput,dhidden \n",
    "    \n",
    "  #  def backward_dh(self,dZ):\n",
    "  #      dh = self.out.backward(dZ)\n",
    "  #      return dh \n",
    "    \n",
    "    def parameters(self):\n",
    "        if self._params is None:\n",
    "            self._params = []\n",
    "            for layer in self.layers:\n",
    "                for  i, _ in enumerate(layer.params):  \n",
    "                    self._params.append([layer.params[i],layer.grads[i]])  \n",
    "        return self._params"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "下面的函数train()接受输入的一对输入输出序列input_tensor和target_tensor，以及编码器、解码器及其优化器encoder, decoder, encoder_optimizer, decoder_optimizer，还有用于计算模型损失的函数loss_fn和正则项系数reg。train()对模型进行一次模型参数的训练更新，先根据input_tensor计算编码器的输出encoder_output, encoder_hidden，然后将最后时刻的隐状态encoder_hidden作为解码器的输入，并和target_tensor一起用于计算解码器最终的预测输出output。然后根据这个预测的输出output和target计算交叉熵损失和该损失关于output的梯度grad，接着用decoder.backward(grad)对解码器反向求导，输出的是关于编码器的输出encoder_hidden的梯度dhidden，根据这个梯度继续对编码器进行反向求导。最后更新模型参数。在更新模型参数前，可以用clip_grad_norm_nn对梯度进行裁剪，防止梯度爆炸。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def train_step(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, loss_fn,reg,max_length=0):\n",
    "    clip = 5.\n",
    "  #  encoder_hidden = encoder.initHidden()\n",
    "\n",
    "    encoder_optimizer.zero_grad()\n",
    "    decoder_optimizer.zero_grad()\n",
    "\n",
    "    input_length = input_tensor.shape[0] #input_tensor.size(0)\n",
    "    \n",
    "    loss = 0\n",
    "    encode_input = input_tensor    \n",
    "    encoder_output, encoder_hidden = encoder(encode_input, None)   \n",
    "    decoder_hidden = encoder_hidden\n",
    "    #output,input_seq = decoder(target_tensor, decoder_hidden) \n",
    "    output = decoder(target_tensor, decoder_hidden) \n",
    "    \n",
    "    target = target_tensor.reshape(-1,1)\n",
    "    if output.shape[0]!= target.shape[0]:       \n",
    "        target = target[:output.shape[0],:]      \n",
    "    loss,grad = loss_fn(output, target)    \n",
    "    loss /=(output.shape[0])\n",
    "    \n",
    "    dinput,dhidden = decoder.backward(grad)\n",
    "    encoder.backward(dhidden[0]) #,encode_input)\n",
    "  \n",
    "    if reg is not None:\n",
    "        loss+=encoder_optimizer.regularization(reg)\n",
    "        loss+=decoder_optimizer.regularization(reg)\n",
    "\n",
    "    util.clip_grad_norm_nn(encoder_optimizer.parameters(),clip,None)\n",
    "    util.clip_grad_norm_nn(decoder_optimizer.parameters(),clip,None)\n",
    "    \n",
    "    encoder_optimizer.step()\n",
    "    decoder_optimizer.step()\n",
    "    \n",
    "    return loss\n",
    "    #return loss.item() / target_length"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "迭代调用train()函数可以迭代的更新模型参数。函数trainIters()完成这个过程，并在迭代过程中可以输出中间的一些训练结果模型，如输出训练误差和验证误差。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import time\n",
    "import math\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "def timeSince(start):\n",
    "    now = time.time()\n",
    "    s = now - start  \n",
    "    m = math.floor(s / 60)\n",
    "    s -= m * 60\n",
    "    return '%dm %ds' % (m, s)\n",
    "\n",
    "def trainIters(encoder, decoder, encoder_optimizer,decoder_optimizer,train_pairs,valid_pairs,print_every=1000, plot_every=100, reg =None):\n",
    "    start = time.time()\n",
    "    valid_losses = []\n",
    "    plot_losses = []\n",
    "    print_loss_total = 0  # Reset every print_every\n",
    "    plot_loss_total = 0  # Reset every plot_every    \n",
    "   \n",
    "#    training_pairs = [tensorsFromPair(random.choice(train_pairs))      for i in range(n_iters)]\n",
    "    training_pairs = train_pairs\n",
    " \n",
    "    #criterion = nn.CrossEntropyLoss() #nn.NLLLoss()\n",
    "    loss_fn =  util.rnn_loss_grad\n",
    "    \n",
    "    for iter in range(1, n_iters + 1):        \n",
    "        pair = training_pairs[iter - 1]\n",
    "        input_tensor,target_tensor = pair[0],pair[1]     \n",
    "       \n",
    "        loss = train_step(input_tensor, target_tensor, encoder,\n",
    "                     decoder, encoder_optimizer, decoder_optimizer, loss_fn,reg)\n",
    "        if loss is None: continue\n",
    "       \n",
    "        print_loss_total += loss\n",
    "        plot_loss_total += loss\n",
    "\n",
    "        if iter % print_every == 0:\n",
    "            print_loss_avg = print_loss_total / print_every\n",
    "            print_loss_total = 0\n",
    "            print('%s (%d %d%%) %.4f' % (timeSince(start),\n",
    "                                         iter, iter / n_iters * 100, print_loss_avg))\n",
    "\n",
    "        if iter % plot_every == 0:\n",
    "            plot_loss_avg = plot_loss_total / plot_every\n",
    "            plot_losses.append(plot_loss_avg)\n",
    "         #   plot_losses.append(loss)\n",
    "            plot_loss_total = 0\n",
    "            \n",
    "            plt.plot(plot_losses)\n",
    "            \n",
    "            valid_losses.append(validation_loss(encoder, decoder, valid_pairs,20,reg))\n",
    "            plt.plot(valid_losses)           \n",
    "            plt.legend([\"train_losses\",\"valid_losses\"])\n",
    "            plt.show()\n",
    "\n",
    "def validation_loss(encoder, decoder, valid_pairs,validation_size = None,reg =None):    \n",
    "    if validation_size is not None:\n",
    "        #valid_pairs = [tensorsFromPair(random.choice(valid_pairs))for i in range(validation_size)]\n",
    "        valid_pairs = [random.choice(valid_pairs) for i in range(validation_size)]\n",
    "    total_loss = 0\n",
    "    loss_fn =  util.rnn_loss_grad\n",
    "    \n",
    "    teacher_forcing_ratio = decoder.teacher_forcing_ratio \n",
    "    decoder.teacher_forcing_ratio = 1.1\n",
    "    for pair in valid_pairs:\n",
    "        encode_input = pair[0]   \n",
    "        target_tensor = pair[1]   \n",
    "        \n",
    "        encoder_output, encoder_hidden = encoder(encode_input, None)   \n",
    "        decoder_hidden = encoder_hidden\n",
    "        output = decoder(target_tensor, decoder_hidden) \n",
    "\n",
    "        \n",
    "        target = target_tensor.reshape(-1,1)\n",
    "        if output.shape[0]!= target.shape[0]:\n",
    "            target = target[:output.shape[0],:]        \n",
    "        loss,grad = loss_fn(output, target)    \n",
    "        loss /=(output.shape[0])\n",
    "       \n",
    "        if reg is not None:\n",
    "            params = encoder.parameters()+decoder.parameters()\n",
    "            reg_loss =0\n",
    "            for p,grad in params:            \n",
    "                 reg_loss+= np.sum(p**2)\n",
    "            loss += reg*reg_loss\n",
    "    \n",
    "        total_loss += loss\n",
    "        \n",
    "    decoder.teacher_forcing_ratio = teacher_forcing_ratio \n",
    "    return total_loss/len(valid_pairs)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 7.11.3 字符级机器翻译的Seq2Seq\n",
    "\n",
    "类ChVerb用于表示一种语言的字符单词表，即其中记录了有哪些字符、每个字符和其在字符单词表中的索引的对应关系。其中用'\\t','\\n'分别表示特殊的开始字符和结束字符，它们对应的单词表索引分别为0和1。\n",
    "\n",
    "#### 1. 字符单词表"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "SOS_token = 0\n",
    "EOS_token = 1\n",
    "\n",
    "class ChVerb:\n",
    "    def __init__(self, name):\n",
    "        self.name = name\n",
    "       \n",
    "        self.char2index = {'\\t':0, '\\n':1}      \n",
    "        self.index2char = {0: '\\t', 1: '\\n'}\n",
    "        self.n_chars = 2  # Count SOS and EOS\n",
    "\n",
    "    def addChars(self, chars):\n",
    "        for char in chars:\n",
    "            self.addChar(char)\n",
    "\n",
    "    def addChar(self, char):\n",
    "        if char not in self.char2index:\n",
    "            self.char2index[char] = self.n_chars           \n",
    "            self.index2char[self.n_chars] = char\n",
    "            self.n_chars += 1  "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 2 读取训练样本、构建字符单词表"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import random\n",
    "import re\n",
    "import unicodedata\n",
    "random.seed(1)\n",
    "\n",
    "#Normalize every sentence\n",
    "#you will normalize it to lower case,\n",
    "#remove all non-character\n",
    "#convert to ASCII from Unicode\n",
    "#split the sentences, so you have each word in it.\n",
    "\n",
    "def unicodeToAscii(sentence):\n",
    "    return ''.join(\n",
    "        c for c in unicodedata.normalize('NFD', sentence)\n",
    "        if unicodedata.category(c) != 'Mn'\n",
    "    )\n",
    "\n",
    "def normalize_sentence(sentence):    \n",
    "    sentence = unicodeToAscii(sentence.lower().strip())\n",
    "    #return sentence\n",
    "    sentence = re.sub(r\"([.!?])\", r\" \\1\", sentence)\n",
    "    sentence = re.sub(r\"[^a-zA-Z.!?]+\", r\" \", sentence)\n",
    "    return sentence\n",
    "\n",
    "\n",
    "MAX_LENGTH  =10\n",
    "\n",
    "eng_prefixes = (\n",
    "    \"i am \", \"i m \",\n",
    "    \"he is\", \"he s \",\n",
    "    \"she is\", \"she s \",\n",
    "    \"you are\", \"you re \",\n",
    "    \"we are\", \"we re \",\n",
    "    \"they are\", \"they re \"\n",
    ")\n",
    "\n",
    "def filterPair(p):  \n",
    "    return  True\n",
    "    return len(p[0].split(' ')) < MAX_LENGTH and \\\n",
    "        len(p[1].split(' ')) < MAX_LENGTH and \\\n",
    "        p[1].startswith(eng_prefixes)\n",
    "\n",
    "\n",
    "def filterPairs(pairs):\n",
    "    return [pair for pair in pairs if filterPair(pair)]\n",
    "\n",
    "\n",
    "def prepareData(lang_from, lang_to, reverse=False):   \n",
    "     # construct verb   \n",
    "    if reverse:\n",
    "        in_verb = ChVerb(lang_to)\n",
    "        out_verb = ChVerb(lang_from)\n",
    "    else:\n",
    "        in_verb = ChVerb(lang_from)\n",
    "        out_verb = ChVerb(lang_to)\n",
    "        \n",
    "    #read pairs    \n",
    "    data_path = 'data/%s-%s/%s.txt' % (lang_to, lang_from,lang_to) \n",
    "    with open(data_path, 'r', encoding='utf-8') as f:\n",
    "        lines = f.read().strip().split('\\n')\n",
    "        pairs = [[normalize_sentence(s) for s in l.split('\\t')][:2] for l in lines]\n",
    "        \n",
    "        if reverse:\n",
    "            pairs = [list(reversed(p)) for p in pairs]\n",
    "\n",
    "    pairs = filterPairs(pairs)\n",
    "    for pair in pairs:        \n",
    "        in_verb.addChars(pair[0])\n",
    "        out_verb.addChars(pair[1])\n",
    "       \n",
    "    return in_verb, out_verb, pairs\n",
    "\n",
    "\n",
    "in_verb, out_verb, pairs = prepareData( 'eng','fra',True) # False)\n",
    "\n",
    "print(\"Read %s sentence pairs\" % len(pairs))  \n",
    "print(\"Counted chars:\")\n",
    "print(in_verb.name, in_verb.n_chars)\n",
    "print(out_verb.name, out_verb.n_chars)\n",
    "for i in range(5):\n",
    "    print(random.choice(pairs))\n",
    "print(pairs[3])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "下面的代码在这些训练样本的文字句子的字符单词和单词表索引进行互相转换："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def indexToSentence(verb, indexes):\n",
    "    sentense = [verb.index2char[idx] for idx in indexes]    \n",
    "    return ''.join(sentense)\n",
    "\n",
    "def indexesFromSentence(verb, sentence):\n",
    "    return [verb.char2index[char] for char in sentence]\n",
    "    \n",
    "def tensorFromSentence(verb, sentence):\n",
    "    indexes = indexesFromSentence(verb, sentence)\n",
    "    indexes.append(EOS_token)\n",
    "    return np.array(indexes).reshape(-1,1)\n",
    "#    return np.array(indexes,dtype = np.int64).reshape(-1,1)\n",
    "\n",
    "def tensorsFromPair(pair):\n",
    "    input_tensor = tensorFromSentence(in_verb, pair[0])\n",
    "    target_tensor = tensorFromSentence(out_verb, pair[1])\n",
    "    return (input_tensor, target_tensor)\n",
    "\n",
    "print(pairs[3])\n",
    "en_input, de_target = tensorsFromPair(pairs[3]) #random.choice(pairs))\n",
    "\n",
    "print(en_input.shape)\n",
    "print(de_target.shape)\n",
    "print(en_input)\n",
    "print(de_target)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 3. 训练字符级的Seq2Seq模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from train import *\n",
    "from Layers import *\n",
    "from rnn import *\n",
    "import util\n",
    "\n",
    "hidden_size = 50 #256 \n",
    "num_layers = 1\n",
    "\n",
    "clip = 5.#50.\n",
    "learning_rate = 0.1\n",
    "decoder_learning_ratio = 1.0 \n",
    "teacher_forcing_ratio =0.5\n",
    "\n",
    "encoder = EncoderRNN(in_verb.n_chars, hidden_size)\n",
    "decoder = DecoderRNN(out_verb.n_chars,hidden_size,out_verb.n_chars,num_layers,teacher_forcing_ratio)\n",
    "\n",
    "momentum = 0.5\n",
    "decay_every  =1000\n",
    "encoder_optimizer = SGD(encoder.parameters(), learning_rate, momentum,decay_every)\n",
    "decoder_optimizer = SGD(decoder.parameters(), learning_rate*decoder_learning_ratio, momentum,decay_every)\n",
    "\n",
    "reg= None#1e-2\n",
    "\n",
    "if True:\n",
    "    pairs = pairs[:80000]\n",
    "\n",
    "np.random.shuffle(pairs)\n",
    "train_n = (int)(len(pairs)*0.98)\n",
    "train_pairs = pairs[:train_n]\n",
    "valid_pairs = pairs[train_n:]\n",
    "\n",
    "n_iters = 50000\n",
    "print_every, plot_every = 100,100  #10,10\n",
    "idx_train_pairs = [tensorsFromPair(random.choice(train_pairs))      for i in range(n_iters)]\n",
    "idx_valid_pairs  =  [tensorsFromPair(pair)  for pair in valid_pairs]\n",
    "trainIters(encoder, decoder,encoder_optimizer,decoder_optimizer,idx_train_pairs,idx_valid_pairs,print_every, plot_every,reg)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "可以用训练的模型进行语言翻译，将待翻译的语言的单词序列（句子）输入到编码器产生一个输出的上下文信息，该信息输入到解码器，解码器从初始时刻的输入SOS和这个上下文新产生翻译的单词序列（句子）。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "MAX_LENGTH = 60\n",
    "def evaluate(encoder, decoder,in_vocab,out_vocab,sentence, max_length=MAX_LENGTH):    \n",
    "    #batch_size  = 1  \n",
    "    #encoder_hidden = encoder.init_hidden(batch_size)    \n",
    "    #encode_input = one_hot(input_verb.n_chars,en_input,True)\n",
    "    encode_input = tensorFromSentence(in_vocab,sentence)\n",
    "    encoder_output, encoder_hidden = encoder(encode_input, None)\n",
    " \n",
    "    output_sentence =  decoder.evaluate(encoder_hidden,max_length)     \n",
    "    output_sentence = indexToSentence(out_vocab,output_sentence)\n",
    "    return output_sentence"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "随机选择几个输入句子，用evaluate预测翻译的句子："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "indices = np.random.randint(len(pairs), size=3)\n",
    "for i in indices:\n",
    "    pair = pairs[i]\n",
    "    print(pair)\n",
    "    sentence = pair[0]   \n",
    "    sentence = evaluate(encoder, decoder,in_verb,out_verb, sentence)\n",
    "    print(sentence)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
